pub struct MultilingualProcessor { /* private fields */ }Expand description
Language-specific text processor
Implementations§
Source§impl MultilingualProcessor
impl MultilingualProcessor
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new multilingual processor
Examples found in repository?
examples/multilingual_demo.rs (line 94)
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7 println!("Multilingual Text Processing Demo");
8 println!("================================\n");
9
10 // Create language detector
11 let detector = LanguageDetector::new();
12
13 // Example texts in different languages
14 let texts = vec![
15 (
16 "This is a sample text in English to test language detection.",
17 "English",
18 ),
19 (
20 "Este es un texto de ejemplo en español para probar la detección de idioma.",
21 "Spanish",
22 ),
23 (
24 "Ceci est un exemple de texte en français pour tester la détection de langue.",
25 "French",
26 ),
27 (
28 "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29 "German",
30 ),
31 ("The quick brown fox jumps over the lazy dog.", "English"),
32 (
33 "El rápido zorro marrón salta sobre el perro perezoso.",
34 "Spanish",
35 ),
36 (
37 "Le rapide renard brun saute par-dessus le chien paresseux.",
38 "French",
39 ),
40 (
41 "Der schnelle braune Fuchs springt über den faulen Hund.",
42 "German",
43 ),
44 ];
45
46 println!("Language Detection:");
47 println!("==================");
48
49 for (text, expected) in &texts {
50 let result = detector.detect(text)?;
51 println!("\nText: \"{text}\"");
52 println!("Expected: {expected}");
53 println!(
54 "Detected: {} (confidence: {:.2}%)",
55 result.language.name(),
56 result.confidence * 100.0
57 );
58
59 if !result.alternatives.is_empty() {
60 println!("Alternatives:");
61 for (lang, score) in &result.alternatives {
62 println!(" - {}: {:.2}%", lang.name(), score * 100.0);
63 }
64 }
65 }
66
67 // Demonstrate stop words functionality
68 println!("\n\nStop Words Processing:");
69 println!("=====================");
70
71 let stop_words = StopWords::new();
72
73 let test_sentences = vec![
74 ("The cat is on the mat", Language::English),
75 ("Le chat est sur le tapis", Language::French),
76 ("El gato está en la alfombra", Language::Spanish),
77 ];
78
79 for (sentence, language) in &test_sentences {
80 let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82 let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84 println!("\nLanguage: {}", language.name());
85 println!("Original: {sentence}");
86 println!("Tokens: {tokens:?}");
87 println!("Without stop words: {filtered:?}");
88 }
89
90 // Demonstrate multilingual processor
91 println!("\n\nMultilingual Processor:");
92 println!("======================");
93
94 let processor = MultilingualProcessor::new();
95
96 let mixedtexts = vec![
97 "Machine learning algorithms are transforming artificial intelligence",
98 "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99 "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100 ];
101
102 for text in &mixedtexts {
103 let result = processor.process(text)?;
104
105 println!("\nOriginal: \"{text}\"");
106 println!(
107 "Detected Language: {} (confidence: {:.2}%)",
108 result.language.name(),
109 result.confidence * 100.0
110 );
111 println!("Tokens: {} total", result.tokens.len());
112 println!(
113 "Filtered Tokens: {} after stop word removal",
114 result.filtered_tokens.len()
115 );
116
117 if result.filtered_tokens.len() <= 5 {
118 println!("Filtered: {:?}", result.filtered_tokens);
119 } else {
120 println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121 }
122 }
123
124 // Language code conversions
125 println!("\n\nLanguage Code Conversions:");
126 println!("=========================");
127
128 let languages = vec![
129 Language::English,
130 Language::Spanish,
131 Language::French,
132 Language::German,
133 Language::Chinese,
134 Language::Japanese,
135 ];
136
137 for lang in &languages {
138 println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139 }
140
141 println!("\nReverse lookup:");
142 let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143 for code in &codes {
144 let lang = Language::from_iso_code(code);
145 println!("{} -> {}", code, lang.name());
146 }
147
148 Ok(())
149}Sourcepub fn process(&self, text: &str) -> Result<ProcessedText>
pub fn process(&self, text: &str) -> Result<ProcessedText>
Process text with automatic language detection
Examples found in repository?
examples/multilingual_demo.rs (line 103)
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7 println!("Multilingual Text Processing Demo");
8 println!("================================\n");
9
10 // Create language detector
11 let detector = LanguageDetector::new();
12
13 // Example texts in different languages
14 let texts = vec![
15 (
16 "This is a sample text in English to test language detection.",
17 "English",
18 ),
19 (
20 "Este es un texto de ejemplo en español para probar la detección de idioma.",
21 "Spanish",
22 ),
23 (
24 "Ceci est un exemple de texte en français pour tester la détection de langue.",
25 "French",
26 ),
27 (
28 "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29 "German",
30 ),
31 ("The quick brown fox jumps over the lazy dog.", "English"),
32 (
33 "El rápido zorro marrón salta sobre el perro perezoso.",
34 "Spanish",
35 ),
36 (
37 "Le rapide renard brun saute par-dessus le chien paresseux.",
38 "French",
39 ),
40 (
41 "Der schnelle braune Fuchs springt über den faulen Hund.",
42 "German",
43 ),
44 ];
45
46 println!("Language Detection:");
47 println!("==================");
48
49 for (text, expected) in &texts {
50 let result = detector.detect(text)?;
51 println!("\nText: \"{text}\"");
52 println!("Expected: {expected}");
53 println!(
54 "Detected: {} (confidence: {:.2}%)",
55 result.language.name(),
56 result.confidence * 100.0
57 );
58
59 if !result.alternatives.is_empty() {
60 println!("Alternatives:");
61 for (lang, score) in &result.alternatives {
62 println!(" - {}: {:.2}%", lang.name(), score * 100.0);
63 }
64 }
65 }
66
67 // Demonstrate stop words functionality
68 println!("\n\nStop Words Processing:");
69 println!("=====================");
70
71 let stop_words = StopWords::new();
72
73 let test_sentences = vec![
74 ("The cat is on the mat", Language::English),
75 ("Le chat est sur le tapis", Language::French),
76 ("El gato está en la alfombra", Language::Spanish),
77 ];
78
79 for (sentence, language) in &test_sentences {
80 let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82 let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84 println!("\nLanguage: {}", language.name());
85 println!("Original: {sentence}");
86 println!("Tokens: {tokens:?}");
87 println!("Without stop words: {filtered:?}");
88 }
89
90 // Demonstrate multilingual processor
91 println!("\n\nMultilingual Processor:");
92 println!("======================");
93
94 let processor = MultilingualProcessor::new();
95
96 let mixedtexts = vec![
97 "Machine learning algorithms are transforming artificial intelligence",
98 "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99 "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100 ];
101
102 for text in &mixedtexts {
103 let result = processor.process(text)?;
104
105 println!("\nOriginal: \"{text}\"");
106 println!(
107 "Detected Language: {} (confidence: {:.2}%)",
108 result.language.name(),
109 result.confidence * 100.0
110 );
111 println!("Tokens: {} total", result.tokens.len());
112 println!(
113 "Filtered Tokens: {} after stop word removal",
114 result.filtered_tokens.len()
115 );
116
117 if result.filtered_tokens.len() <= 5 {
118 println!("Filtered: {:?}", result.filtered_tokens);
119 } else {
120 println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121 }
122 }
123
124 // Language code conversions
125 println!("\n\nLanguage Code Conversions:");
126 println!("=========================");
127
128 let languages = vec![
129 Language::English,
130 Language::Spanish,
131 Language::French,
132 Language::German,
133 Language::Chinese,
134 Language::Japanese,
135 ];
136
137 for lang in &languages {
138 println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139 }
140
141 println!("\nReverse lookup:");
142 let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143 for code in &codes {
144 let lang = Language::from_iso_code(code);
145 println!("{} -> {}", code, lang.name());
146 }
147
148 Ok(())
149}Trait Implementations§
Auto Trait Implementations§
impl Freeze for MultilingualProcessor
impl RefUnwindSafe for MultilingualProcessor
impl Send for MultilingualProcessor
impl Sync for MultilingualProcessor
impl Unpin for MultilingualProcessor
impl UnwindSafe for MultilingualProcessor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.