DetectorFactory

Struct DetectorFactory 

Source
pub struct DetectorFactory {
    pub word_lang_prob_map: HashMap<String, Vec<f64>>,
    pub langlist: Vec<String>,
    pub seed: Option<u64>,
}
Expand description

Factory for creating language detectors with pre-loaded language profiles.

The DetectorFactory manages a collection of language profiles and provides methods to create Detector instances for language identification.

§Examples

use langdetect_rs::detector_factory::DetectorFactory;

// Create factory with built-in profiles
let factory = DetectorFactory::default().build();

// Create a detector
let detector = factory.create(None);

Fields§

§word_lang_prob_map: HashMap<String, Vec<f64>>

Word-to-language probability mapping for all loaded languages.

§langlist: Vec<String>

List of language identifiers in the same order as probability vectors.

§seed: Option<u64>

Optional seed for reproducible randomization.

Implementations§

Source§

impl DetectorFactory

Source

pub fn new() -> DetectorFactoryBuilder

Creates a new DetectorFactory builder.

Use the builder pattern to configure the factory before calling build().

§Examples
use langdetect_rs::detector_factory::DetectorFactory;

let factory = DetectorFactory::new()
    .with_seed(Some(42))
    .build();
Examples found in repository?
examples/custom_profile/main.rs (line 7)
5fn main() {
6    // Create an empty factory
7    let mut factory = DetectorFactory::new().build();
8
9    // Load language profiles from the crate's profiles directory
10    let profiles_dir = Path::new("./").join("profiles");
11
12    println!("Read JSON profiles from {}", profiles_dir.display());
13
14    // Load Russian profile
15    let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16    match &ru_json {
17        Ok(_) => println!("\tRead Russian JSON profile"),
18        Err(e) => {
19            println!("Error reading Russian JSON profile: {:?}", e);
20            return;
21        }
22    }
23    let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24        Ok(profile) => profile,
25        Err(e) => {
26            println!("Error creating Russian LangProfile: {}", e);
27            return;
28        }
29    };
30
31    // Load English profile
32    let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33    match &en_json {
34        Ok(_) => println!("\tRead English JSON profile"),
35        Err(e) => {
36            println!("Error reading English JSON profile: {:?}", e);
37            return;
38        }
39    }
40    let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41        Ok(profile) => profile,
42        Err(e) => {
43            println!("Error creating English LangProfile: {}", e);
44            return;
45        }
46    };
47    
48    println!("Adding custom language profiles to the factory...");
49    // Add profiles to the factory
50    // Make sure to use correct language IDs as per your profiles
51    // And provide correct FINAL size of languages array
52    let final_size = 2; // Update this if you add more profiles
53    if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54    ) {
55        println!("Error adding Russian profile: {:?}", e);
56        return;
57    }
58    println!("\tLoaded Russian profile");
59    if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60        println!("Error adding English profile: {:?}", e);
61        return;
62    }
63    println!("\tLoaded English profile");
64
65    println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67    println!("Testing language detection...");
68
69    // Test Russian text
70    match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71        Ok(lang) => println!("\tRussian text detected as: {}", lang),
72        Err(e) => println!("Detection error: {:?}", e),
73    }
74
75    // Test English text
76    match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78        Err(e) => println!("Detection error: {:?}", e),
79    }
80
81    // Test French text (will be detected as the closest match from available languages)
82    // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83    // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84    // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85    match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86        Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87        Err(e) => println!("Detection error: {:?}", e),
88    }
89
90    // Show probabilities for the French text to see why it was classified as English
91    match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92        Ok(probs) => {
93            println!("\tFrench text probabilities:");
94            for lang in probs {
95                println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96            }
97        }
98        Err(e) => println!("Probability error: {:?}", e),
99    }
100}
More examples
Hide additional examples
examples/extend_default/main.rs (line 40)
5fn main() {
6    // Load language profiles from the crate's profiles directory
7    let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9    println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11    let mut lang_profiles = vec![];
12    // Load every profile in the directory
13    for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14        let entry = entry.unwrap();
15        let path = entry.path();
16        if !path.is_file() {
17            continue;
18        }
19        let lang_json = LangProfileJson::new_from_file(path);
20        match &lang_json {
21            Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22            Err(e) => {
23                println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24                return;
25            }
26        }
27        let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28            Ok(profile) => profile,
29            Err(e) => {
30                println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31                return;
32            }
33        };
34        lang_profiles.push(lang_profile);
35    }
36
37    println!("Adding all default languages profiles to the factory...");
38
39    // Create an EMPTY factory
40    let mut factory = DetectorFactory::new().build();
41
42    // Get number of profiles to set final size
43    let mut profile_count = lang_profiles.len();
44    // Since we know that we are going to add another profile which is not in the default set
45    // we increase the final size by 1
46    profile_count += 1;
47
48    println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49    
50    for (i, profile) in lang_profiles.into_iter().enumerate() {
51        let profile_name = if let Some(name) = &profile.name {
52            name.clone()
53        } else {
54            "unknown".to_string()
55        };
56        println!("\tAdding profile: {} at index {}", profile_name, i);
57        if let Err(e) = factory.add_profile(profile, i, profile_count) {
58            println!("Error adding {} profile: {:?}", profile_name, e);
59            return;
60        }
61    }
62
63    // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64    let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65    let sah_json = LangProfileJson::new_from_file(sah_path);
66    match &sah_json {
67        Ok(_) => println!("Read Sakha JSON profile"),
68        Err(e) => {
69            println!("Error reading Sakha JSON profile: {:?}", e);
70            return;
71        }
72    }
73    let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74        Ok(profile) => profile,
75        Err(e) => {
76            println!("Error creating Sakha LangProfile: {}", e);
77            return;
78        }
79    };
80    println!("Adding Sakha language profile to the factory");
81    if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82        println!("Error adding Sakha profile: {:?}", e);
83        return;
84    }
85
86    println!("Testing language detection...");
87    // Test Russian text
88    match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89        Ok(lang) => println!("\tRussian text detected as: {}", lang),
90        Err(e) => println!("Detection error: {:?}", e),
91    }
92
93    // Test English text
94    match factory.detect("He pays no attention to the plank in his own eye", None) {
95        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96        Err(e) => println!("Detection error: {:?}", e),
97    }
98
99    // Test Sakha (Yakut) text
100    match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101        Ok(lang) => println!("\tSakha text detected as: {}", lang),
102        Err(e) => println!("Detection error: {:?}", e),
103    }
104}
Source

pub fn default() -> DetectorFactoryBuilder

Creates a DetectorFactoryBuilder with all built-in language profiles loaded.

This method loads the 55 built-in language profiles from the crate’s profiles directory and returns a builder that can be further re-configured. The profiles are cached for performance.

§Example
use langdetect_rs::detector_factory::DetectorFactory;

let factory = DetectorFactory::default()
    .with_seed(Some(42))
    .build();
Examples found in repository?
examples/simple/main.rs (line 4)
3fn main() {
4    let factory = DetectorFactory::default().build();
5
6    // let mut detector = factory.create(None);
7    match factory.detect("War doesn't show who's right, just who's left.", None) {
8        Ok(lang) => println!("Detected language: {}", lang),
9        Err(e) => println!("Detection error: {:?}", e),
10    }
11
12    // let mut detector = factory.create(None);
13    match factory.detect("Ein, zwei, drei, vier", None) {
14        Ok(lang) => println!("Detected language: {}", lang),
15        Err(e) => println!("Detection error: {:?}", e),
16    }
17
18    match factory.get_probabilities("Otec matka syn.", None) {
19        Ok(probs) => println!("Language probabilities: {:?}", probs),
20        Err(e) => println!("Detection error: {:?}", e),
21    }
22
23    // For reproducibility use a fixed seed within explicitly defined detector
24    let mut detector = factory.create(None);
25    detector.seed = Some(42);
26    detector.append("Otec matka syn.");
27    match detector.get_probabilities() {
28        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29        Err(e) => println!("Detection error: {:?}", e),
30    }
31
32    // Or you can set the seed for the factory itself and it will be inherited by detectors
33    let factory_with_seed = DetectorFactory::default()
34        .with_seed(Some(43))
35        .build();
36    match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38        Err(e) => println!("Detection error: {:?}", e),
39    }
40}
Source

pub fn get_default_profiles_path() -> PathBuf

Returns the path to the default language profiles directory.

This method provides the path to the built-in language profile files that ship with the crate. End-users can use this path to load default profiles when extending or customizing the factory.

Note: This path is only accessible when the crate is used as a source dependency or when running from the crate’s directory. When used as a published dependency, the profiles may not be available as filesystem files.

§Returns

A PathBuf pointing to the default profiles directory.

§Example
use langdetect_rs::detector_factory::DetectorFactory;
use langdetect_rs::utils::lang_profile::{LangProfileJson, LangProfile};

// Get path to default profiles
let profiles_path = DetectorFactory::get_default_profiles_path();
println!("Default profiles are located at: {:?}", profiles_path);

// Load a specific profile
let en_profile = LangProfileJson::new_from_file(profiles_path.join("en")).unwrap();
let profile = LangProfile::from_json(en_profile).unwrap();

// Add to custom factory
let mut factory = DetectorFactory::new().build();
factory.add_profile(profile, 0, 1).unwrap();
Examples found in repository?
examples/extend_default/main.rs (line 7)
5fn main() {
6    // Load language profiles from the crate's profiles directory
7    let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9    println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11    let mut lang_profiles = vec![];
12    // Load every profile in the directory
13    for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14        let entry = entry.unwrap();
15        let path = entry.path();
16        if !path.is_file() {
17            continue;
18        }
19        let lang_json = LangProfileJson::new_from_file(path);
20        match &lang_json {
21            Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22            Err(e) => {
23                println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24                return;
25            }
26        }
27        let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28            Ok(profile) => profile,
29            Err(e) => {
30                println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31                return;
32            }
33        };
34        lang_profiles.push(lang_profile);
35    }
36
37    println!("Adding all default languages profiles to the factory...");
38
39    // Create an EMPTY factory
40    let mut factory = DetectorFactory::new().build();
41
42    // Get number of profiles to set final size
43    let mut profile_count = lang_profiles.len();
44    // Since we know that we are going to add another profile which is not in the default set
45    // we increase the final size by 1
46    profile_count += 1;
47
48    println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49    
50    for (i, profile) in lang_profiles.into_iter().enumerate() {
51        let profile_name = if let Some(name) = &profile.name {
52            name.clone()
53        } else {
54            "unknown".to_string()
55        };
56        println!("\tAdding profile: {} at index {}", profile_name, i);
57        if let Err(e) = factory.add_profile(profile, i, profile_count) {
58            println!("Error adding {} profile: {:?}", profile_name, e);
59            return;
60        }
61    }
62
63    // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64    let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65    let sah_json = LangProfileJson::new_from_file(sah_path);
66    match &sah_json {
67        Ok(_) => println!("Read Sakha JSON profile"),
68        Err(e) => {
69            println!("Error reading Sakha JSON profile: {:?}", e);
70            return;
71        }
72    }
73    let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74        Ok(profile) => profile,
75        Err(e) => {
76            println!("Error creating Sakha LangProfile: {}", e);
77            return;
78        }
79    };
80    println!("Adding Sakha language profile to the factory");
81    if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82        println!("Error adding Sakha profile: {:?}", e);
83        return;
84    }
85
86    println!("Testing language detection...");
87    // Test Russian text
88    match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89        Ok(lang) => println!("\tRussian text detected as: {}", lang),
90        Err(e) => println!("Detection error: {:?}", e),
91    }
92
93    // Test English text
94    match factory.detect("He pays no attention to the plank in his own eye", None) {
95        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96        Err(e) => println!("Detection error: {:?}", e),
97    }
98
99    // Test Sakha (Yakut) text
100    match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101        Ok(lang) => println!("\tSakha text detected as: {}", lang),
102        Err(e) => println!("Detection error: {:?}", e),
103    }
104}
Source

pub fn clear(&mut self)

Clears all loaded language profiles and mappings.

Source

pub fn set_seed(&mut self, seed: u64)

Sets the randomization seed for reproducible results.

§Arguments
  • seed - The seed value to use for randomization.
Source

pub fn get_lang_list(&self) -> Vec<String>

Returns a list of all loaded language identifiers.

§Returns

A vector of language codes (ISO 639-1) in the order they were loaded.

Examples found in repository?
examples/custom_profile/main.rs (line 65)
5fn main() {
6    // Create an empty factory
7    let mut factory = DetectorFactory::new().build();
8
9    // Load language profiles from the crate's profiles directory
10    let profiles_dir = Path::new("./").join("profiles");
11
12    println!("Read JSON profiles from {}", profiles_dir.display());
13
14    // Load Russian profile
15    let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16    match &ru_json {
17        Ok(_) => println!("\tRead Russian JSON profile"),
18        Err(e) => {
19            println!("Error reading Russian JSON profile: {:?}", e);
20            return;
21        }
22    }
23    let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24        Ok(profile) => profile,
25        Err(e) => {
26            println!("Error creating Russian LangProfile: {}", e);
27            return;
28        }
29    };
30
31    // Load English profile
32    let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33    match &en_json {
34        Ok(_) => println!("\tRead English JSON profile"),
35        Err(e) => {
36            println!("Error reading English JSON profile: {:?}", e);
37            return;
38        }
39    }
40    let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41        Ok(profile) => profile,
42        Err(e) => {
43            println!("Error creating English LangProfile: {}", e);
44            return;
45        }
46    };
47    
48    println!("Adding custom language profiles to the factory...");
49    // Add profiles to the factory
50    // Make sure to use correct language IDs as per your profiles
51    // And provide correct FINAL size of languages array
52    let final_size = 2; // Update this if you add more profiles
53    if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54    ) {
55        println!("Error adding Russian profile: {:?}", e);
56        return;
57    }
58    println!("\tLoaded Russian profile");
59    if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60        println!("Error adding English profile: {:?}", e);
61        return;
62    }
63    println!("\tLoaded English profile");
64
65    println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67    println!("Testing language detection...");
68
69    // Test Russian text
70    match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71        Ok(lang) => println!("\tRussian text detected as: {}", lang),
72        Err(e) => println!("Detection error: {:?}", e),
73    }
74
75    // Test English text
76    match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78        Err(e) => println!("Detection error: {:?}", e),
79    }
80
81    // Test French text (will be detected as the closest match from available languages)
82    // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83    // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84    // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85    match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86        Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87        Err(e) => println!("Detection error: {:?}", e),
88    }
89
90    // Show probabilities for the French text to see why it was classified as English
91    match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92        Ok(probs) => {
93            println!("\tFrench text probabilities:");
94            for lang in probs {
95                println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96            }
97        }
98        Err(e) => println!("Probability error: {:?}", e),
99    }
100}
Source

pub fn create(&self, alpha: Option<f64>) -> Detector

Creates a new Detector instance with the current profiles.

§Arguments
  • alpha - Optional alpha smoothing parameter (default: 0.5).
§Returns

A configured Detector ready for language detection.

Examples found in repository?
examples/simple/main.rs (line 24)
3fn main() {
4    let factory = DetectorFactory::default().build();
5
6    // let mut detector = factory.create(None);
7    match factory.detect("War doesn't show who's right, just who's left.", None) {
8        Ok(lang) => println!("Detected language: {}", lang),
9        Err(e) => println!("Detection error: {:?}", e),
10    }
11
12    // let mut detector = factory.create(None);
13    match factory.detect("Ein, zwei, drei, vier", None) {
14        Ok(lang) => println!("Detected language: {}", lang),
15        Err(e) => println!("Detection error: {:?}", e),
16    }
17
18    match factory.get_probabilities("Otec matka syn.", None) {
19        Ok(probs) => println!("Language probabilities: {:?}", probs),
20        Err(e) => println!("Detection error: {:?}", e),
21    }
22
23    // For reproducibility use a fixed seed within explicitly defined detector
24    let mut detector = factory.create(None);
25    detector.seed = Some(42);
26    detector.append("Otec matka syn.");
27    match detector.get_probabilities() {
28        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29        Err(e) => println!("Detection error: {:?}", e),
30    }
31
32    // Or you can set the seed for the factory itself and it will be inherited by detectors
33    let factory_with_seed = DetectorFactory::default()
34        .with_seed(Some(43))
35        .build();
36    match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38        Err(e) => println!("Detection error: {:?}", e),
39    }
40}
Source

pub fn override_profile( &mut self, profile: LangProfile, index: usize, langsize: usize, ) -> Result<(), DetectorFactoryError>

Overrides an existing language profile at the specified index.

This is an internal method used during profile loading.

§Arguments
  • profile - The language profile to add.
  • index - The index in the language list.
  • langsize - Total number of languages.
Source

pub fn add_profile( &mut self, profile: LangProfile, index: usize, langsize: usize, ) -> Result<(), DetectorFactoryError>

Adds a new language profile to the factory.

§Arguments
  • profile - The language profile to add.
  • index - The index position for this language.
  • langsize - Total number of languages in the profile set.
§Errors

Returns DetectorFactoryError::DuplicatedLanguage if the language already exists.

Examples found in repository?
examples/custom_profile/main.rs (lines 53-54)
5fn main() {
6    // Create an empty factory
7    let mut factory = DetectorFactory::new().build();
8
9    // Load language profiles from the crate's profiles directory
10    let profiles_dir = Path::new("./").join("profiles");
11
12    println!("Read JSON profiles from {}", profiles_dir.display());
13
14    // Load Russian profile
15    let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16    match &ru_json {
17        Ok(_) => println!("\tRead Russian JSON profile"),
18        Err(e) => {
19            println!("Error reading Russian JSON profile: {:?}", e);
20            return;
21        }
22    }
23    let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24        Ok(profile) => profile,
25        Err(e) => {
26            println!("Error creating Russian LangProfile: {}", e);
27            return;
28        }
29    };
30
31    // Load English profile
32    let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33    match &en_json {
34        Ok(_) => println!("\tRead English JSON profile"),
35        Err(e) => {
36            println!("Error reading English JSON profile: {:?}", e);
37            return;
38        }
39    }
40    let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41        Ok(profile) => profile,
42        Err(e) => {
43            println!("Error creating English LangProfile: {}", e);
44            return;
45        }
46    };
47    
48    println!("Adding custom language profiles to the factory...");
49    // Add profiles to the factory
50    // Make sure to use correct language IDs as per your profiles
51    // And provide correct FINAL size of languages array
52    let final_size = 2; // Update this if you add more profiles
53    if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54    ) {
55        println!("Error adding Russian profile: {:?}", e);
56        return;
57    }
58    println!("\tLoaded Russian profile");
59    if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60        println!("Error adding English profile: {:?}", e);
61        return;
62    }
63    println!("\tLoaded English profile");
64
65    println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67    println!("Testing language detection...");
68
69    // Test Russian text
70    match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71        Ok(lang) => println!("\tRussian text detected as: {}", lang),
72        Err(e) => println!("Detection error: {:?}", e),
73    }
74
75    // Test English text
76    match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78        Err(e) => println!("Detection error: {:?}", e),
79    }
80
81    // Test French text (will be detected as the closest match from available languages)
82    // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83    // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84    // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85    match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86        Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87        Err(e) => println!("Detection error: {:?}", e),
88    }
89
90    // Show probabilities for the French text to see why it was classified as English
91    match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92        Ok(probs) => {
93            println!("\tFrench text probabilities:");
94            for lang in probs {
95                println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96            }
97        }
98        Err(e) => println!("Probability error: {:?}", e),
99    }
100}
More examples
Hide additional examples
examples/extend_default/main.rs (line 57)
5fn main() {
6    // Load language profiles from the crate's profiles directory
7    let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9    println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11    let mut lang_profiles = vec![];
12    // Load every profile in the directory
13    for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14        let entry = entry.unwrap();
15        let path = entry.path();
16        if !path.is_file() {
17            continue;
18        }
19        let lang_json = LangProfileJson::new_from_file(path);
20        match &lang_json {
21            Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22            Err(e) => {
23                println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24                return;
25            }
26        }
27        let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28            Ok(profile) => profile,
29            Err(e) => {
30                println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31                return;
32            }
33        };
34        lang_profiles.push(lang_profile);
35    }
36
37    println!("Adding all default languages profiles to the factory...");
38
39    // Create an EMPTY factory
40    let mut factory = DetectorFactory::new().build();
41
42    // Get number of profiles to set final size
43    let mut profile_count = lang_profiles.len();
44    // Since we know that we are going to add another profile which is not in the default set
45    // we increase the final size by 1
46    profile_count += 1;
47
48    println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49    
50    for (i, profile) in lang_profiles.into_iter().enumerate() {
51        let profile_name = if let Some(name) = &profile.name {
52            name.clone()
53        } else {
54            "unknown".to_string()
55        };
56        println!("\tAdding profile: {} at index {}", profile_name, i);
57        if let Err(e) = factory.add_profile(profile, i, profile_count) {
58            println!("Error adding {} profile: {:?}", profile_name, e);
59            return;
60        }
61    }
62
63    // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64    let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65    let sah_json = LangProfileJson::new_from_file(sah_path);
66    match &sah_json {
67        Ok(_) => println!("Read Sakha JSON profile"),
68        Err(e) => {
69            println!("Error reading Sakha JSON profile: {:?}", e);
70            return;
71        }
72    }
73    let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74        Ok(profile) => profile,
75        Err(e) => {
76            println!("Error creating Sakha LangProfile: {}", e);
77            return;
78        }
79    };
80    println!("Adding Sakha language profile to the factory");
81    if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82        println!("Error adding Sakha profile: {:?}", e);
83        return;
84    }
85
86    println!("Testing language detection...");
87    // Test Russian text
88    match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89        Ok(lang) => println!("\tRussian text detected as: {}", lang),
90        Err(e) => println!("Detection error: {:?}", e),
91    }
92
93    // Test English text
94    match factory.detect("He pays no attention to the plank in his own eye", None) {
95        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96        Err(e) => println!("Detection error: {:?}", e),
97    }
98
99    // Test Sakha (Yakut) text
100    match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101        Ok(lang) => println!("\tSakha text detected as: {}", lang),
102        Err(e) => println!("Detection error: {:?}", e),
103    }
104}
Source

pub fn delete_profile(&mut self, lang: &str) -> Result<(), DetectorFactoryError>

Removes a language profile from the factory.

§Arguments
  • lang - The language code to remove.
§Errors

Returns DetectorFactoryError::DuplicatedLanguage if the language doesn’t exist.

Source

pub fn load_json_profile( &mut self, json_profiles: &[&str], ) -> Result<(), DetectorFactoryError>

Loads language profiles from JSON strings.

§Arguments
  • json_profiles - Array of JSON strings representing language profiles.
§Errors

Returns DetectorFactoryError::NotEnoughProfiles if fewer than 2 profiles provided.

Source

pub fn detect( &self, text: &str, alpha: Option<f64>, ) -> Result<String, DetectorError>

Shortcut method to detect language from text in one call.

§Arguments
  • text - The text to analyze.
  • alpha - Optional alpha smoothing parameter.
§Returns

The detected language code or an error.

§Example
use langdetect_rs::detector_factory::DetectorFactory;

let factory = DetectorFactory::default().build();
let result = factory.detect("Hello world!", None);
Examples found in repository?
examples/simple/main.rs (line 7)
3fn main() {
4    let factory = DetectorFactory::default().build();
5
6    // let mut detector = factory.create(None);
7    match factory.detect("War doesn't show who's right, just who's left.", None) {
8        Ok(lang) => println!("Detected language: {}", lang),
9        Err(e) => println!("Detection error: {:?}", e),
10    }
11
12    // let mut detector = factory.create(None);
13    match factory.detect("Ein, zwei, drei, vier", None) {
14        Ok(lang) => println!("Detected language: {}", lang),
15        Err(e) => println!("Detection error: {:?}", e),
16    }
17
18    match factory.get_probabilities("Otec matka syn.", None) {
19        Ok(probs) => println!("Language probabilities: {:?}", probs),
20        Err(e) => println!("Detection error: {:?}", e),
21    }
22
23    // For reproducibility use a fixed seed within explicitly defined detector
24    let mut detector = factory.create(None);
25    detector.seed = Some(42);
26    detector.append("Otec matka syn.");
27    match detector.get_probabilities() {
28        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29        Err(e) => println!("Detection error: {:?}", e),
30    }
31
32    // Or you can set the seed for the factory itself and it will be inherited by detectors
33    let factory_with_seed = DetectorFactory::default()
34        .with_seed(Some(43))
35        .build();
36    match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38        Err(e) => println!("Detection error: {:?}", e),
39    }
40}
More examples
Hide additional examples
examples/custom_profile/main.rs (line 70)
5fn main() {
6    // Create an empty factory
7    let mut factory = DetectorFactory::new().build();
8
9    // Load language profiles from the crate's profiles directory
10    let profiles_dir = Path::new("./").join("profiles");
11
12    println!("Read JSON profiles from {}", profiles_dir.display());
13
14    // Load Russian profile
15    let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16    match &ru_json {
17        Ok(_) => println!("\tRead Russian JSON profile"),
18        Err(e) => {
19            println!("Error reading Russian JSON profile: {:?}", e);
20            return;
21        }
22    }
23    let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24        Ok(profile) => profile,
25        Err(e) => {
26            println!("Error creating Russian LangProfile: {}", e);
27            return;
28        }
29    };
30
31    // Load English profile
32    let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33    match &en_json {
34        Ok(_) => println!("\tRead English JSON profile"),
35        Err(e) => {
36            println!("Error reading English JSON profile: {:?}", e);
37            return;
38        }
39    }
40    let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41        Ok(profile) => profile,
42        Err(e) => {
43            println!("Error creating English LangProfile: {}", e);
44            return;
45        }
46    };
47    
48    println!("Adding custom language profiles to the factory...");
49    // Add profiles to the factory
50    // Make sure to use correct language IDs as per your profiles
51    // And provide correct FINAL size of languages array
52    let final_size = 2; // Update this if you add more profiles
53    if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54    ) {
55        println!("Error adding Russian profile: {:?}", e);
56        return;
57    }
58    println!("\tLoaded Russian profile");
59    if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60        println!("Error adding English profile: {:?}", e);
61        return;
62    }
63    println!("\tLoaded English profile");
64
65    println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67    println!("Testing language detection...");
68
69    // Test Russian text
70    match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71        Ok(lang) => println!("\tRussian text detected as: {}", lang),
72        Err(e) => println!("Detection error: {:?}", e),
73    }
74
75    // Test English text
76    match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78        Err(e) => println!("Detection error: {:?}", e),
79    }
80
81    // Test French text (will be detected as the closest match from available languages)
82    // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83    // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84    // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85    match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86        Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87        Err(e) => println!("Detection error: {:?}", e),
88    }
89
90    // Show probabilities for the French text to see why it was classified as English
91    match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92        Ok(probs) => {
93            println!("\tFrench text probabilities:");
94            for lang in probs {
95                println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96            }
97        }
98        Err(e) => println!("Probability error: {:?}", e),
99    }
100}
examples/extend_default/main.rs (line 88)
5fn main() {
6    // Load language profiles from the crate's profiles directory
7    let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9    println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11    let mut lang_profiles = vec![];
12    // Load every profile in the directory
13    for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14        let entry = entry.unwrap();
15        let path = entry.path();
16        if !path.is_file() {
17            continue;
18        }
19        let lang_json = LangProfileJson::new_from_file(path);
20        match &lang_json {
21            Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22            Err(e) => {
23                println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24                return;
25            }
26        }
27        let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28            Ok(profile) => profile,
29            Err(e) => {
30                println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31                return;
32            }
33        };
34        lang_profiles.push(lang_profile);
35    }
36
37    println!("Adding all default languages profiles to the factory...");
38
39    // Create an EMPTY factory
40    let mut factory = DetectorFactory::new().build();
41
42    // Get number of profiles to set final size
43    let mut profile_count = lang_profiles.len();
44    // Since we know that we are going to add another profile which is not in the default set
45    // we increase the final size by 1
46    profile_count += 1;
47
48    println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49    
50    for (i, profile) in lang_profiles.into_iter().enumerate() {
51        let profile_name = if let Some(name) = &profile.name {
52            name.clone()
53        } else {
54            "unknown".to_string()
55        };
56        println!("\tAdding profile: {} at index {}", profile_name, i);
57        if let Err(e) = factory.add_profile(profile, i, profile_count) {
58            println!("Error adding {} profile: {:?}", profile_name, e);
59            return;
60        }
61    }
62
63    // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64    let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65    let sah_json = LangProfileJson::new_from_file(sah_path);
66    match &sah_json {
67        Ok(_) => println!("Read Sakha JSON profile"),
68        Err(e) => {
69            println!("Error reading Sakha JSON profile: {:?}", e);
70            return;
71        }
72    }
73    let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74        Ok(profile) => profile,
75        Err(e) => {
76            println!("Error creating Sakha LangProfile: {}", e);
77            return;
78        }
79    };
80    println!("Adding Sakha language profile to the factory");
81    if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82        println!("Error adding Sakha profile: {:?}", e);
83        return;
84    }
85
86    println!("Testing language detection...");
87    // Test Russian text
88    match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89        Ok(lang) => println!("\tRussian text detected as: {}", lang),
90        Err(e) => println!("Detection error: {:?}", e),
91    }
92
93    // Test English text
94    match factory.detect("He pays no attention to the plank in his own eye", None) {
95        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96        Err(e) => println!("Detection error: {:?}", e),
97    }
98
99    // Test Sakha (Yakut) text
100    match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101        Ok(lang) => println!("\tSakha text detected as: {}", lang),
102        Err(e) => println!("Detection error: {:?}", e),
103    }
104}
Source

pub fn get_probabilities( &self, text: &str, alpha: Option<f64>, ) -> Result<Vec<Language>, DetectorError>

Shortcut method to get language probabilities from text in one call.

§Arguments
  • text - The text to analyze.
  • alpha - Optional alpha smoothing parameter.
§Returns

A vector of languages with their probabilities, sorted by probability descending.

§Example
use langdetect_rs::detector_factory::DetectorFactory;

let factory = DetectorFactory::default().build();
let result = factory.get_probabilities("Hello world!", None);
Examples found in repository?
examples/simple/main.rs (line 18)
3fn main() {
4    let factory = DetectorFactory::default().build();
5
6    // let mut detector = factory.create(None);
7    match factory.detect("War doesn't show who's right, just who's left.", None) {
8        Ok(lang) => println!("Detected language: {}", lang),
9        Err(e) => println!("Detection error: {:?}", e),
10    }
11
12    // let mut detector = factory.create(None);
13    match factory.detect("Ein, zwei, drei, vier", None) {
14        Ok(lang) => println!("Detected language: {}", lang),
15        Err(e) => println!("Detection error: {:?}", e),
16    }
17
18    match factory.get_probabilities("Otec matka syn.", None) {
19        Ok(probs) => println!("Language probabilities: {:?}", probs),
20        Err(e) => println!("Detection error: {:?}", e),
21    }
22
23    // For reproducibility use a fixed seed within explicitly defined detector
24    let mut detector = factory.create(None);
25    detector.seed = Some(42);
26    detector.append("Otec matka syn.");
27    match detector.get_probabilities() {
28        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29        Err(e) => println!("Detection error: {:?}", e),
30    }
31
32    // Or you can set the seed for the factory itself and it will be inherited by detectors
33    let factory_with_seed = DetectorFactory::default()
34        .with_seed(Some(43))
35        .build();
36    match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37        Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38        Err(e) => println!("Detection error: {:?}", e),
39    }
40}
More examples
Hide additional examples
examples/custom_profile/main.rs (line 91)
5fn main() {
6    // Create an empty factory
7    let mut factory = DetectorFactory::new().build();
8
9    // Load language profiles from the crate's profiles directory
10    let profiles_dir = Path::new("./").join("profiles");
11
12    println!("Read JSON profiles from {}", profiles_dir.display());
13
14    // Load Russian profile
15    let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16    match &ru_json {
17        Ok(_) => println!("\tRead Russian JSON profile"),
18        Err(e) => {
19            println!("Error reading Russian JSON profile: {:?}", e);
20            return;
21        }
22    }
23    let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24        Ok(profile) => profile,
25        Err(e) => {
26            println!("Error creating Russian LangProfile: {}", e);
27            return;
28        }
29    };
30
31    // Load English profile
32    let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33    match &en_json {
34        Ok(_) => println!("\tRead English JSON profile"),
35        Err(e) => {
36            println!("Error reading English JSON profile: {:?}", e);
37            return;
38        }
39    }
40    let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41        Ok(profile) => profile,
42        Err(e) => {
43            println!("Error creating English LangProfile: {}", e);
44            return;
45        }
46    };
47    
48    println!("Adding custom language profiles to the factory...");
49    // Add profiles to the factory
50    // Make sure to use correct language IDs as per your profiles
51    // And provide correct FINAL size of languages array
52    let final_size = 2; // Update this if you add more profiles
53    if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54    ) {
55        println!("Error adding Russian profile: {:?}", e);
56        return;
57    }
58    println!("\tLoaded Russian profile");
59    if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60        println!("Error adding English profile: {:?}", e);
61        return;
62    }
63    println!("\tLoaded English profile");
64
65    println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67    println!("Testing language detection...");
68
69    // Test Russian text
70    match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71        Ok(lang) => println!("\tRussian text detected as: {}", lang),
72        Err(e) => println!("Detection error: {:?}", e),
73    }
74
75    // Test English text
76    match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77        Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78        Err(e) => println!("Detection error: {:?}", e),
79    }
80
81    // Test French text (will be detected as the closest match from available languages)
82    // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83    // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84    // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85    match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86        Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87        Err(e) => println!("Detection error: {:?}", e),
88    }
89
90    // Show probabilities for the French text to see why it was classified as English
91    match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92        Ok(probs) => {
93            println!("\tFrench text probabilities:");
94            for lang in probs {
95                println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96            }
97        }
98        Err(e) => println!("Probability error: {:?}", e),
99    }
100}
Source

pub fn load_profile<P: AsRef<Path>>( &mut self, profile_directory: P, ) -> Result<(), String>

Loads all language profiles from a directory of JSON files.

§Arguments
  • profile_directory - Path to directory containing JSON profile files.
§Returns

Ok(()) on success, or an error string on failure.

§Example
use langdetect_rs::detector_factory::DetectorFactory;

let mut factory = DetectorFactory::new().build();
factory.load_profile("profiles/").unwrap();

Trait Implementations§

Source§

impl Clone for DetectorFactory

Source§

fn clone(&self) -> DetectorFactory

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V