MultilingualProcessor

Struct MultilingualProcessor 

Source
pub struct MultilingualProcessor { /* private fields */ }
Expand description

Language-specific text processor

Implementations§

Source§

impl MultilingualProcessor

Source

pub fn new() -> Self

Create a new multilingual processor

Examples found in repository?
examples/multilingual_demo.rs (line 94)
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7    println!("Multilingual Text Processing Demo");
8    println!("================================\n");
9
10    // Create language detector
11    let detector = LanguageDetector::new();
12
13    // Example texts in different languages
14    let texts = vec![
15        (
16            "This is a sample text in English to test language detection.",
17            "English",
18        ),
19        (
20            "Este es un texto de ejemplo en español para probar la detección de idioma.",
21            "Spanish",
22        ),
23        (
24            "Ceci est un exemple de texte en français pour tester la détection de langue.",
25            "French",
26        ),
27        (
28            "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29            "German",
30        ),
31        ("The quick brown fox jumps over the lazy dog.", "English"),
32        (
33            "El rápido zorro marrón salta sobre el perro perezoso.",
34            "Spanish",
35        ),
36        (
37            "Le rapide renard brun saute par-dessus le chien paresseux.",
38            "French",
39        ),
40        (
41            "Der schnelle braune Fuchs springt über den faulen Hund.",
42            "German",
43        ),
44    ];
45
46    println!("Language Detection:");
47    println!("==================");
48
49    for (text, expected) in &texts {
50        let result = detector.detect(text)?;
51        println!("\nText: \"{text}\"");
52        println!("Expected: {expected}");
53        println!(
54            "Detected: {} (confidence: {:.2}%)",
55            result.language.name(),
56            result.confidence * 100.0
57        );
58
59        if !result.alternatives.is_empty() {
60            println!("Alternatives:");
61            for (lang, score) in &result.alternatives {
62                println!("  - {}: {:.2}%", lang.name(), score * 100.0);
63            }
64        }
65    }
66
67    // Demonstrate stop words functionality
68    println!("\n\nStop Words Processing:");
69    println!("=====================");
70
71    let stop_words = StopWords::new();
72
73    let test_sentences = vec![
74        ("The cat is on the mat", Language::English),
75        ("Le chat est sur le tapis", Language::French),
76        ("El gato está en la alfombra", Language::Spanish),
77    ];
78
79    for (sentence, language) in &test_sentences {
80        let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82        let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84        println!("\nLanguage: {}", language.name());
85        println!("Original: {sentence}");
86        println!("Tokens: {tokens:?}");
87        println!("Without stop words: {filtered:?}");
88    }
89
90    // Demonstrate multilingual processor
91    println!("\n\nMultilingual Processor:");
92    println!("======================");
93
94    let processor = MultilingualProcessor::new();
95
96    let mixedtexts = vec![
97        "Machine learning algorithms are transforming artificial intelligence",
98        "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99        "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100    ];
101
102    for text in &mixedtexts {
103        let result = processor.process(text)?;
104
105        println!("\nOriginal: \"{text}\"");
106        println!(
107            "Detected Language: {} (confidence: {:.2}%)",
108            result.language.name(),
109            result.confidence * 100.0
110        );
111        println!("Tokens: {} total", result.tokens.len());
112        println!(
113            "Filtered Tokens: {} after stop word removal",
114            result.filtered_tokens.len()
115        );
116
117        if result.filtered_tokens.len() <= 5 {
118            println!("Filtered: {:?}", result.filtered_tokens);
119        } else {
120            println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121        }
122    }
123
124    // Language code conversions
125    println!("\n\nLanguage Code Conversions:");
126    println!("=========================");
127
128    let languages = vec![
129        Language::English,
130        Language::Spanish,
131        Language::French,
132        Language::German,
133        Language::Chinese,
134        Language::Japanese,
135    ];
136
137    for lang in &languages {
138        println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139    }
140
141    println!("\nReverse lookup:");
142    let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143    for code in &codes {
144        let lang = Language::from_iso_code(code);
145        println!("{} -> {}", code, lang.name());
146    }
147
148    Ok(())
149}
Source

pub fn process(&self, text: &str) -> Result<ProcessedText>

Process text with automatic language detection

Examples found in repository?
examples/multilingual_demo.rs (line 103)
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7    println!("Multilingual Text Processing Demo");
8    println!("================================\n");
9
10    // Create language detector
11    let detector = LanguageDetector::new();
12
13    // Example texts in different languages
14    let texts = vec![
15        (
16            "This is a sample text in English to test language detection.",
17            "English",
18        ),
19        (
20            "Este es un texto de ejemplo en español para probar la detección de idioma.",
21            "Spanish",
22        ),
23        (
24            "Ceci est un exemple de texte en français pour tester la détection de langue.",
25            "French",
26        ),
27        (
28            "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29            "German",
30        ),
31        ("The quick brown fox jumps over the lazy dog.", "English"),
32        (
33            "El rápido zorro marrón salta sobre el perro perezoso.",
34            "Spanish",
35        ),
36        (
37            "Le rapide renard brun saute par-dessus le chien paresseux.",
38            "French",
39        ),
40        (
41            "Der schnelle braune Fuchs springt über den faulen Hund.",
42            "German",
43        ),
44    ];
45
46    println!("Language Detection:");
47    println!("==================");
48
49    for (text, expected) in &texts {
50        let result = detector.detect(text)?;
51        println!("\nText: \"{text}\"");
52        println!("Expected: {expected}");
53        println!(
54            "Detected: {} (confidence: {:.2}%)",
55            result.language.name(),
56            result.confidence * 100.0
57        );
58
59        if !result.alternatives.is_empty() {
60            println!("Alternatives:");
61            for (lang, score) in &result.alternatives {
62                println!("  - {}: {:.2}%", lang.name(), score * 100.0);
63            }
64        }
65    }
66
67    // Demonstrate stop words functionality
68    println!("\n\nStop Words Processing:");
69    println!("=====================");
70
71    let stop_words = StopWords::new();
72
73    let test_sentences = vec![
74        ("The cat is on the mat", Language::English),
75        ("Le chat est sur le tapis", Language::French),
76        ("El gato está en la alfombra", Language::Spanish),
77    ];
78
79    for (sentence, language) in &test_sentences {
80        let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82        let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84        println!("\nLanguage: {}", language.name());
85        println!("Original: {sentence}");
86        println!("Tokens: {tokens:?}");
87        println!("Without stop words: {filtered:?}");
88    }
89
90    // Demonstrate multilingual processor
91    println!("\n\nMultilingual Processor:");
92    println!("======================");
93
94    let processor = MultilingualProcessor::new();
95
96    let mixedtexts = vec![
97        "Machine learning algorithms are transforming artificial intelligence",
98        "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99        "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100    ];
101
102    for text in &mixedtexts {
103        let result = processor.process(text)?;
104
105        println!("\nOriginal: \"{text}\"");
106        println!(
107            "Detected Language: {} (confidence: {:.2}%)",
108            result.language.name(),
109            result.confidence * 100.0
110        );
111        println!("Tokens: {} total", result.tokens.len());
112        println!(
113            "Filtered Tokens: {} after stop word removal",
114            result.filtered_tokens.len()
115        );
116
117        if result.filtered_tokens.len() <= 5 {
118            println!("Filtered: {:?}", result.filtered_tokens);
119        } else {
120            println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121        }
122    }
123
124    // Language code conversions
125    println!("\n\nLanguage Code Conversions:");
126    println!("=========================");
127
128    let languages = vec![
129        Language::English,
130        Language::Spanish,
131        Language::French,
132        Language::German,
133        Language::Chinese,
134        Language::Japanese,
135    ];
136
137    for lang in &languages {
138        println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139    }
140
141    println!("\nReverse lookup:");
142    let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143    for code in &codes {
144        let lang = Language::from_iso_code(code);
145        println!("{} -> {}", code, lang.name());
146    }
147
148    Ok(())
149}

Trait Implementations§

Source§

impl Default for MultilingualProcessor

Source§

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

Source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
Source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
Source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
Source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V