pub struct DetectorFactory {
pub word_lang_prob_map: HashMap<String, Vec<f64>>,
pub langlist: Vec<String>,
pub seed: Option<u64>,
}Expand description
Factory for creating language detectors with pre-loaded language profiles.
The DetectorFactory manages a collection of language profiles and provides methods to create Detector instances for language identification.
§Examples
use langdetect_rs::detector_factory::DetectorFactory;
// Create factory with built-in profiles
let factory = DetectorFactory::default().build();
// Create a detector
let detector = factory.create(None);Fields§
§word_lang_prob_map: HashMap<String, Vec<f64>>Word-to-language probability mapping for all loaded languages.
langlist: Vec<String>List of language identifiers in the same order as probability vectors.
seed: Option<u64>Optional seed for reproducible randomization.
Implementations§
Source§impl DetectorFactory
impl DetectorFactory
Sourcepub fn new() -> DetectorFactoryBuilder
pub fn new() -> DetectorFactoryBuilder
Creates a new DetectorFactory builder.
Use the builder pattern to configure the factory before calling build().
§Examples
use langdetect_rs::detector_factory::DetectorFactory;
let factory = DetectorFactory::new()
.with_seed(Some(42))
.build();Examples found in repository?
5fn main() {
6 // Create an empty factory
7 let mut factory = DetectorFactory::new().build();
8
9 // Load language profiles from the crate's profiles directory
10 let profiles_dir = Path::new("./").join("profiles");
11
12 println!("Read JSON profiles from {}", profiles_dir.display());
13
14 // Load Russian profile
15 let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16 match &ru_json {
17 Ok(_) => println!("\tRead Russian JSON profile"),
18 Err(e) => {
19 println!("Error reading Russian JSON profile: {:?}", e);
20 return;
21 }
22 }
23 let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24 Ok(profile) => profile,
25 Err(e) => {
26 println!("Error creating Russian LangProfile: {}", e);
27 return;
28 }
29 };
30
31 // Load English profile
32 let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33 match &en_json {
34 Ok(_) => println!("\tRead English JSON profile"),
35 Err(e) => {
36 println!("Error reading English JSON profile: {:?}", e);
37 return;
38 }
39 }
40 let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41 Ok(profile) => profile,
42 Err(e) => {
43 println!("Error creating English LangProfile: {}", e);
44 return;
45 }
46 };
47
48 println!("Adding custom language profiles to the factory...");
49 // Add profiles to the factory
50 // Make sure to use correct language IDs as per your profiles
51 // And provide correct FINAL size of languages array
52 let final_size = 2; // Update this if you add more profiles
53 if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54 ) {
55 println!("Error adding Russian profile: {:?}", e);
56 return;
57 }
58 println!("\tLoaded Russian profile");
59 if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60 println!("Error adding English profile: {:?}", e);
61 return;
62 }
63 println!("\tLoaded English profile");
64
65 println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67 println!("Testing language detection...");
68
69 // Test Russian text
70 match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71 Ok(lang) => println!("\tRussian text detected as: {}", lang),
72 Err(e) => println!("Detection error: {:?}", e),
73 }
74
75 // Test English text
76 match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78 Err(e) => println!("Detection error: {:?}", e),
79 }
80
81 // Test French text (will be detected as the closest match from available languages)
82 // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83 // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84 // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85 match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86 Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87 Err(e) => println!("Detection error: {:?}", e),
88 }
89
90 // Show probabilities for the French text to see why it was classified as English
91 match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92 Ok(probs) => {
93 println!("\tFrench text probabilities:");
94 for lang in probs {
95 println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96 }
97 }
98 Err(e) => println!("Probability error: {:?}", e),
99 }
100}More examples
5fn main() {
6 // Load language profiles from the crate's profiles directory
7 let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9 println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11 let mut lang_profiles = vec![];
12 // Load every profile in the directory
13 for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14 let entry = entry.unwrap();
15 let path = entry.path();
16 if !path.is_file() {
17 continue;
18 }
19 let lang_json = LangProfileJson::new_from_file(path);
20 match &lang_json {
21 Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22 Err(e) => {
23 println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24 return;
25 }
26 }
27 let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28 Ok(profile) => profile,
29 Err(e) => {
30 println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31 return;
32 }
33 };
34 lang_profiles.push(lang_profile);
35 }
36
37 println!("Adding all default languages profiles to the factory...");
38
39 // Create an EMPTY factory
40 let mut factory = DetectorFactory::new().build();
41
42 // Get number of profiles to set final size
43 let mut profile_count = lang_profiles.len();
44 // Since we know that we are going to add another profile which is not in the default set
45 // we increase the final size by 1
46 profile_count += 1;
47
48 println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49
50 for (i, profile) in lang_profiles.into_iter().enumerate() {
51 let profile_name = if let Some(name) = &profile.name {
52 name.clone()
53 } else {
54 "unknown".to_string()
55 };
56 println!("\tAdding profile: {} at index {}", profile_name, i);
57 if let Err(e) = factory.add_profile(profile, i, profile_count) {
58 println!("Error adding {} profile: {:?}", profile_name, e);
59 return;
60 }
61 }
62
63 // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64 let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65 let sah_json = LangProfileJson::new_from_file(sah_path);
66 match &sah_json {
67 Ok(_) => println!("Read Sakha JSON profile"),
68 Err(e) => {
69 println!("Error reading Sakha JSON profile: {:?}", e);
70 return;
71 }
72 }
73 let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74 Ok(profile) => profile,
75 Err(e) => {
76 println!("Error creating Sakha LangProfile: {}", e);
77 return;
78 }
79 };
80 println!("Adding Sakha language profile to the factory");
81 if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82 println!("Error adding Sakha profile: {:?}", e);
83 return;
84 }
85
86 println!("Testing language detection...");
87 // Test Russian text
88 match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89 Ok(lang) => println!("\tRussian text detected as: {}", lang),
90 Err(e) => println!("Detection error: {:?}", e),
91 }
92
93 // Test English text
94 match factory.detect("He pays no attention to the plank in his own eye", None) {
95 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96 Err(e) => println!("Detection error: {:?}", e),
97 }
98
99 // Test Sakha (Yakut) text
100 match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101 Ok(lang) => println!("\tSakha text detected as: {}", lang),
102 Err(e) => println!("Detection error: {:?}", e),
103 }
104}Sourcepub fn default() -> DetectorFactoryBuilder
pub fn default() -> DetectorFactoryBuilder
Creates a DetectorFactoryBuilder with all built-in language profiles loaded.
This method loads the 55 built-in language profiles from the crate’s profiles directory and returns a builder that can be further re-configured. The profiles are cached for performance.
§Example
use langdetect_rs::detector_factory::DetectorFactory;
let factory = DetectorFactory::default()
.with_seed(Some(42))
.build();Examples found in repository?
3fn main() {
4 let factory = DetectorFactory::default().build();
5
6 // let mut detector = factory.create(None);
7 match factory.detect("War doesn't show who's right, just who's left.", None) {
8 Ok(lang) => println!("Detected language: {}", lang),
9 Err(e) => println!("Detection error: {:?}", e),
10 }
11
12 // let mut detector = factory.create(None);
13 match factory.detect("Ein, zwei, drei, vier", None) {
14 Ok(lang) => println!("Detected language: {}", lang),
15 Err(e) => println!("Detection error: {:?}", e),
16 }
17
18 match factory.get_probabilities("Otec matka syn.", None) {
19 Ok(probs) => println!("Language probabilities: {:?}", probs),
20 Err(e) => println!("Detection error: {:?}", e),
21 }
22
23 // For reproducibility use a fixed seed within explicitly defined detector
24 let mut detector = factory.create(None);
25 detector.seed = Some(42);
26 detector.append("Otec matka syn.");
27 match detector.get_probabilities() {
28 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29 Err(e) => println!("Detection error: {:?}", e),
30 }
31
32 // Or you can set the seed for the factory itself and it will be inherited by detectors
33 let factory_with_seed = DetectorFactory::default()
34 .with_seed(Some(43))
35 .build();
36 match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38 Err(e) => println!("Detection error: {:?}", e),
39 }
40}Sourcepub fn get_default_profiles_path() -> PathBuf
pub fn get_default_profiles_path() -> PathBuf
Returns the path to the default language profiles directory.
This method provides the path to the built-in language profile files that ship with the crate. End-users can use this path to load default profiles when extending or customizing the factory.
Note: This path is only accessible when the crate is used as a source dependency or when running from the crate’s directory. When used as a published dependency, the profiles may not be available as filesystem files.
§Returns
A PathBuf pointing to the default profiles directory.
§Example
use langdetect_rs::detector_factory::DetectorFactory;
use langdetect_rs::utils::lang_profile::{LangProfileJson, LangProfile};
// Get path to default profiles
let profiles_path = DetectorFactory::get_default_profiles_path();
println!("Default profiles are located at: {:?}", profiles_path);
// Load a specific profile
let en_profile = LangProfileJson::new_from_file(profiles_path.join("en")).unwrap();
let profile = LangProfile::from_json(en_profile).unwrap();
// Add to custom factory
let mut factory = DetectorFactory::new().build();
factory.add_profile(profile, 0, 1).unwrap();Examples found in repository?
5fn main() {
6 // Load language profiles from the crate's profiles directory
7 let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9 println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11 let mut lang_profiles = vec![];
12 // Load every profile in the directory
13 for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14 let entry = entry.unwrap();
15 let path = entry.path();
16 if !path.is_file() {
17 continue;
18 }
19 let lang_json = LangProfileJson::new_from_file(path);
20 match &lang_json {
21 Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22 Err(e) => {
23 println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24 return;
25 }
26 }
27 let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28 Ok(profile) => profile,
29 Err(e) => {
30 println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31 return;
32 }
33 };
34 lang_profiles.push(lang_profile);
35 }
36
37 println!("Adding all default languages profiles to the factory...");
38
39 // Create an EMPTY factory
40 let mut factory = DetectorFactory::new().build();
41
42 // Get number of profiles to set final size
43 let mut profile_count = lang_profiles.len();
44 // Since we know that we are going to add another profile which is not in the default set
45 // we increase the final size by 1
46 profile_count += 1;
47
48 println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49
50 for (i, profile) in lang_profiles.into_iter().enumerate() {
51 let profile_name = if let Some(name) = &profile.name {
52 name.clone()
53 } else {
54 "unknown".to_string()
55 };
56 println!("\tAdding profile: {} at index {}", profile_name, i);
57 if let Err(e) = factory.add_profile(profile, i, profile_count) {
58 println!("Error adding {} profile: {:?}", profile_name, e);
59 return;
60 }
61 }
62
63 // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64 let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65 let sah_json = LangProfileJson::new_from_file(sah_path);
66 match &sah_json {
67 Ok(_) => println!("Read Sakha JSON profile"),
68 Err(e) => {
69 println!("Error reading Sakha JSON profile: {:?}", e);
70 return;
71 }
72 }
73 let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74 Ok(profile) => profile,
75 Err(e) => {
76 println!("Error creating Sakha LangProfile: {}", e);
77 return;
78 }
79 };
80 println!("Adding Sakha language profile to the factory");
81 if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82 println!("Error adding Sakha profile: {:?}", e);
83 return;
84 }
85
86 println!("Testing language detection...");
87 // Test Russian text
88 match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89 Ok(lang) => println!("\tRussian text detected as: {}", lang),
90 Err(e) => println!("Detection error: {:?}", e),
91 }
92
93 // Test English text
94 match factory.detect("He pays no attention to the plank in his own eye", None) {
95 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96 Err(e) => println!("Detection error: {:?}", e),
97 }
98
99 // Test Sakha (Yakut) text
100 match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101 Ok(lang) => println!("\tSakha text detected as: {}", lang),
102 Err(e) => println!("Detection error: {:?}", e),
103 }
104}Sourcepub fn set_seed(&mut self, seed: u64)
pub fn set_seed(&mut self, seed: u64)
Sets the randomization seed for reproducible results.
§Arguments
seed- The seed value to use for randomization.
Sourcepub fn get_lang_list(&self) -> Vec<String>
pub fn get_lang_list(&self) -> Vec<String>
Returns a list of all loaded language identifiers.
§Returns
A vector of language codes (ISO 639-1) in the order they were loaded.
Examples found in repository?
5fn main() {
6 // Create an empty factory
7 let mut factory = DetectorFactory::new().build();
8
9 // Load language profiles from the crate's profiles directory
10 let profiles_dir = Path::new("./").join("profiles");
11
12 println!("Read JSON profiles from {}", profiles_dir.display());
13
14 // Load Russian profile
15 let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16 match &ru_json {
17 Ok(_) => println!("\tRead Russian JSON profile"),
18 Err(e) => {
19 println!("Error reading Russian JSON profile: {:?}", e);
20 return;
21 }
22 }
23 let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24 Ok(profile) => profile,
25 Err(e) => {
26 println!("Error creating Russian LangProfile: {}", e);
27 return;
28 }
29 };
30
31 // Load English profile
32 let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33 match &en_json {
34 Ok(_) => println!("\tRead English JSON profile"),
35 Err(e) => {
36 println!("Error reading English JSON profile: {:?}", e);
37 return;
38 }
39 }
40 let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41 Ok(profile) => profile,
42 Err(e) => {
43 println!("Error creating English LangProfile: {}", e);
44 return;
45 }
46 };
47
48 println!("Adding custom language profiles to the factory...");
49 // Add profiles to the factory
50 // Make sure to use correct language IDs as per your profiles
51 // And provide correct FINAL size of languages array
52 let final_size = 2; // Update this if you add more profiles
53 if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54 ) {
55 println!("Error adding Russian profile: {:?}", e);
56 return;
57 }
58 println!("\tLoaded Russian profile");
59 if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60 println!("Error adding English profile: {:?}", e);
61 return;
62 }
63 println!("\tLoaded English profile");
64
65 println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67 println!("Testing language detection...");
68
69 // Test Russian text
70 match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71 Ok(lang) => println!("\tRussian text detected as: {}", lang),
72 Err(e) => println!("Detection error: {:?}", e),
73 }
74
75 // Test English text
76 match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78 Err(e) => println!("Detection error: {:?}", e),
79 }
80
81 // Test French text (will be detected as the closest match from available languages)
82 // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83 // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84 // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85 match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86 Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87 Err(e) => println!("Detection error: {:?}", e),
88 }
89
90 // Show probabilities for the French text to see why it was classified as English
91 match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92 Ok(probs) => {
93 println!("\tFrench text probabilities:");
94 for lang in probs {
95 println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96 }
97 }
98 Err(e) => println!("Probability error: {:?}", e),
99 }
100}Sourcepub fn create(&self, alpha: Option<f64>) -> Detector
pub fn create(&self, alpha: Option<f64>) -> Detector
Creates a new Detector instance with the current profiles.
§Arguments
alpha- Optional alpha smoothing parameter (default: 0.5).
§Returns
A configured Detector ready for language detection.
Examples found in repository?
3fn main() {
4 let factory = DetectorFactory::default().build();
5
6 // let mut detector = factory.create(None);
7 match factory.detect("War doesn't show who's right, just who's left.", None) {
8 Ok(lang) => println!("Detected language: {}", lang),
9 Err(e) => println!("Detection error: {:?}", e),
10 }
11
12 // let mut detector = factory.create(None);
13 match factory.detect("Ein, zwei, drei, vier", None) {
14 Ok(lang) => println!("Detected language: {}", lang),
15 Err(e) => println!("Detection error: {:?}", e),
16 }
17
18 match factory.get_probabilities("Otec matka syn.", None) {
19 Ok(probs) => println!("Language probabilities: {:?}", probs),
20 Err(e) => println!("Detection error: {:?}", e),
21 }
22
23 // For reproducibility use a fixed seed within explicitly defined detector
24 let mut detector = factory.create(None);
25 detector.seed = Some(42);
26 detector.append("Otec matka syn.");
27 match detector.get_probabilities() {
28 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29 Err(e) => println!("Detection error: {:?}", e),
30 }
31
32 // Or you can set the seed for the factory itself and it will be inherited by detectors
33 let factory_with_seed = DetectorFactory::default()
34 .with_seed(Some(43))
35 .build();
36 match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38 Err(e) => println!("Detection error: {:?}", e),
39 }
40}Sourcepub fn override_profile(
&mut self,
profile: LangProfile,
index: usize,
langsize: usize,
) -> Result<(), DetectorFactoryError>
pub fn override_profile( &mut self, profile: LangProfile, index: usize, langsize: usize, ) -> Result<(), DetectorFactoryError>
Overrides an existing language profile at the specified index.
This is an internal method used during profile loading.
§Arguments
profile- The language profile to add.index- The index in the language list.langsize- Total number of languages.
Sourcepub fn add_profile(
&mut self,
profile: LangProfile,
index: usize,
langsize: usize,
) -> Result<(), DetectorFactoryError>
pub fn add_profile( &mut self, profile: LangProfile, index: usize, langsize: usize, ) -> Result<(), DetectorFactoryError>
Adds a new language profile to the factory.
§Arguments
profile- The language profile to add.index- The index position for this language.langsize- Total number of languages in the profile set.
§Errors
Returns DetectorFactoryError::DuplicatedLanguage if the language already exists.
Examples found in repository?
5fn main() {
6 // Create an empty factory
7 let mut factory = DetectorFactory::new().build();
8
9 // Load language profiles from the crate's profiles directory
10 let profiles_dir = Path::new("./").join("profiles");
11
12 println!("Read JSON profiles from {}", profiles_dir.display());
13
14 // Load Russian profile
15 let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16 match &ru_json {
17 Ok(_) => println!("\tRead Russian JSON profile"),
18 Err(e) => {
19 println!("Error reading Russian JSON profile: {:?}", e);
20 return;
21 }
22 }
23 let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24 Ok(profile) => profile,
25 Err(e) => {
26 println!("Error creating Russian LangProfile: {}", e);
27 return;
28 }
29 };
30
31 // Load English profile
32 let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33 match &en_json {
34 Ok(_) => println!("\tRead English JSON profile"),
35 Err(e) => {
36 println!("Error reading English JSON profile: {:?}", e);
37 return;
38 }
39 }
40 let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41 Ok(profile) => profile,
42 Err(e) => {
43 println!("Error creating English LangProfile: {}", e);
44 return;
45 }
46 };
47
48 println!("Adding custom language profiles to the factory...");
49 // Add profiles to the factory
50 // Make sure to use correct language IDs as per your profiles
51 // And provide correct FINAL size of languages array
52 let final_size = 2; // Update this if you add more profiles
53 if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54 ) {
55 println!("Error adding Russian profile: {:?}", e);
56 return;
57 }
58 println!("\tLoaded Russian profile");
59 if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60 println!("Error adding English profile: {:?}", e);
61 return;
62 }
63 println!("\tLoaded English profile");
64
65 println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67 println!("Testing language detection...");
68
69 // Test Russian text
70 match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71 Ok(lang) => println!("\tRussian text detected as: {}", lang),
72 Err(e) => println!("Detection error: {:?}", e),
73 }
74
75 // Test English text
76 match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78 Err(e) => println!("Detection error: {:?}", e),
79 }
80
81 // Test French text (will be detected as the closest match from available languages)
82 // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83 // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84 // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85 match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86 Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87 Err(e) => println!("Detection error: {:?}", e),
88 }
89
90 // Show probabilities for the French text to see why it was classified as English
91 match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92 Ok(probs) => {
93 println!("\tFrench text probabilities:");
94 for lang in probs {
95 println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96 }
97 }
98 Err(e) => println!("Probability error: {:?}", e),
99 }
100}More examples
5fn main() {
6 // Load language profiles from the crate's profiles directory
7 let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9 println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11 let mut lang_profiles = vec![];
12 // Load every profile in the directory
13 for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14 let entry = entry.unwrap();
15 let path = entry.path();
16 if !path.is_file() {
17 continue;
18 }
19 let lang_json = LangProfileJson::new_from_file(path);
20 match &lang_json {
21 Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22 Err(e) => {
23 println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24 return;
25 }
26 }
27 let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28 Ok(profile) => profile,
29 Err(e) => {
30 println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31 return;
32 }
33 };
34 lang_profiles.push(lang_profile);
35 }
36
37 println!("Adding all default languages profiles to the factory...");
38
39 // Create an EMPTY factory
40 let mut factory = DetectorFactory::new().build();
41
42 // Get number of profiles to set final size
43 let mut profile_count = lang_profiles.len();
44 // Since we know that we are going to add another profile which is not in the default set
45 // we increase the final size by 1
46 profile_count += 1;
47
48 println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49
50 for (i, profile) in lang_profiles.into_iter().enumerate() {
51 let profile_name = if let Some(name) = &profile.name {
52 name.clone()
53 } else {
54 "unknown".to_string()
55 };
56 println!("\tAdding profile: {} at index {}", profile_name, i);
57 if let Err(e) = factory.add_profile(profile, i, profile_count) {
58 println!("Error adding {} profile: {:?}", profile_name, e);
59 return;
60 }
61 }
62
63 // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64 let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65 let sah_json = LangProfileJson::new_from_file(sah_path);
66 match &sah_json {
67 Ok(_) => println!("Read Sakha JSON profile"),
68 Err(e) => {
69 println!("Error reading Sakha JSON profile: {:?}", e);
70 return;
71 }
72 }
73 let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74 Ok(profile) => profile,
75 Err(e) => {
76 println!("Error creating Sakha LangProfile: {}", e);
77 return;
78 }
79 };
80 println!("Adding Sakha language profile to the factory");
81 if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82 println!("Error adding Sakha profile: {:?}", e);
83 return;
84 }
85
86 println!("Testing language detection...");
87 // Test Russian text
88 match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89 Ok(lang) => println!("\tRussian text detected as: {}", lang),
90 Err(e) => println!("Detection error: {:?}", e),
91 }
92
93 // Test English text
94 match factory.detect("He pays no attention to the plank in his own eye", None) {
95 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96 Err(e) => println!("Detection error: {:?}", e),
97 }
98
99 // Test Sakha (Yakut) text
100 match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101 Ok(lang) => println!("\tSakha text detected as: {}", lang),
102 Err(e) => println!("Detection error: {:?}", e),
103 }
104}Sourcepub fn delete_profile(&mut self, lang: &str) -> Result<(), DetectorFactoryError>
pub fn delete_profile(&mut self, lang: &str) -> Result<(), DetectorFactoryError>
Sourcepub fn load_json_profile(
&mut self,
json_profiles: &[&str],
) -> Result<(), DetectorFactoryError>
pub fn load_json_profile( &mut self, json_profiles: &[&str], ) -> Result<(), DetectorFactoryError>
Sourcepub fn detect(
&self,
text: &str,
alpha: Option<f64>,
) -> Result<String, DetectorError>
pub fn detect( &self, text: &str, alpha: Option<f64>, ) -> Result<String, DetectorError>
Shortcut method to detect language from text in one call.
§Arguments
text- The text to analyze.alpha- Optional alpha smoothing parameter.
§Returns
The detected language code or an error.
§Example
use langdetect_rs::detector_factory::DetectorFactory;
let factory = DetectorFactory::default().build();
let result = factory.detect("Hello world!", None);Examples found in repository?
3fn main() {
4 let factory = DetectorFactory::default().build();
5
6 // let mut detector = factory.create(None);
7 match factory.detect("War doesn't show who's right, just who's left.", None) {
8 Ok(lang) => println!("Detected language: {}", lang),
9 Err(e) => println!("Detection error: {:?}", e),
10 }
11
12 // let mut detector = factory.create(None);
13 match factory.detect("Ein, zwei, drei, vier", None) {
14 Ok(lang) => println!("Detected language: {}", lang),
15 Err(e) => println!("Detection error: {:?}", e),
16 }
17
18 match factory.get_probabilities("Otec matka syn.", None) {
19 Ok(probs) => println!("Language probabilities: {:?}", probs),
20 Err(e) => println!("Detection error: {:?}", e),
21 }
22
23 // For reproducibility use a fixed seed within explicitly defined detector
24 let mut detector = factory.create(None);
25 detector.seed = Some(42);
26 detector.append("Otec matka syn.");
27 match detector.get_probabilities() {
28 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29 Err(e) => println!("Detection error: {:?}", e),
30 }
31
32 // Or you can set the seed for the factory itself and it will be inherited by detectors
33 let factory_with_seed = DetectorFactory::default()
34 .with_seed(Some(43))
35 .build();
36 match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38 Err(e) => println!("Detection error: {:?}", e),
39 }
40}More examples
5fn main() {
6 // Create an empty factory
7 let mut factory = DetectorFactory::new().build();
8
9 // Load language profiles from the crate's profiles directory
10 let profiles_dir = Path::new("./").join("profiles");
11
12 println!("Read JSON profiles from {}", profiles_dir.display());
13
14 // Load Russian profile
15 let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16 match &ru_json {
17 Ok(_) => println!("\tRead Russian JSON profile"),
18 Err(e) => {
19 println!("Error reading Russian JSON profile: {:?}", e);
20 return;
21 }
22 }
23 let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24 Ok(profile) => profile,
25 Err(e) => {
26 println!("Error creating Russian LangProfile: {}", e);
27 return;
28 }
29 };
30
31 // Load English profile
32 let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33 match &en_json {
34 Ok(_) => println!("\tRead English JSON profile"),
35 Err(e) => {
36 println!("Error reading English JSON profile: {:?}", e);
37 return;
38 }
39 }
40 let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41 Ok(profile) => profile,
42 Err(e) => {
43 println!("Error creating English LangProfile: {}", e);
44 return;
45 }
46 };
47
48 println!("Adding custom language profiles to the factory...");
49 // Add profiles to the factory
50 // Make sure to use correct language IDs as per your profiles
51 // And provide correct FINAL size of languages array
52 let final_size = 2; // Update this if you add more profiles
53 if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54 ) {
55 println!("Error adding Russian profile: {:?}", e);
56 return;
57 }
58 println!("\tLoaded Russian profile");
59 if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60 println!("Error adding English profile: {:?}", e);
61 return;
62 }
63 println!("\tLoaded English profile");
64
65 println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67 println!("Testing language detection...");
68
69 // Test Russian text
70 match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71 Ok(lang) => println!("\tRussian text detected as: {}", lang),
72 Err(e) => println!("Detection error: {:?}", e),
73 }
74
75 // Test English text
76 match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78 Err(e) => println!("Detection error: {:?}", e),
79 }
80
81 // Test French text (will be detected as the closest match from available languages)
82 // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83 // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84 // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85 match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86 Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87 Err(e) => println!("Detection error: {:?}", e),
88 }
89
90 // Show probabilities for the French text to see why it was classified as English
91 match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92 Ok(probs) => {
93 println!("\tFrench text probabilities:");
94 for lang in probs {
95 println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96 }
97 }
98 Err(e) => println!("Probability error: {:?}", e),
99 }
100}5fn main() {
6 // Load language profiles from the crate's profiles directory
7 let profiles_dir = DetectorFactory::get_default_profiles_path();
8
9 println!("Read all default JSON profiles from {}", profiles_dir.display());
10
11 let mut lang_profiles = vec![];
12 // Load every profile in the directory
13 for entry in std::fs::read_dir(&profiles_dir).unwrap() {
14 let entry = entry.unwrap();
15 let path = entry.path();
16 if !path.is_file() {
17 continue;
18 }
19 let lang_json = LangProfileJson::new_from_file(path);
20 match &lang_json {
21 Ok(_) => println!("\tRead {} JSON profile", entry.file_name().to_string_lossy()),
22 Err(e) => {
23 println!("Error reading {} JSON profile: {:?}", entry.file_name().to_string_lossy(), e);
24 return;
25 }
26 }
27 let lang_profile = match LangProfile::from_json(lang_json.unwrap()) {
28 Ok(profile) => profile,
29 Err(e) => {
30 println!("Error creating {} LangProfile: {}", entry.file_name().to_string_lossy(), e);
31 return;
32 }
33 };
34 lang_profiles.push(lang_profile);
35 }
36
37 println!("Adding all default languages profiles to the factory...");
38
39 // Create an EMPTY factory
40 let mut factory = DetectorFactory::new().build();
41
42 // Get number of profiles to set final size
43 let mut profile_count = lang_profiles.len();
44 // Since we know that we are going to add another profile which is not in the default set
45 // we increase the final size by 1
46 profile_count += 1;
47
48 println!("Final size (assuming we are going to extend default set) of languages array will be: {}", profile_count);
49
50 for (i, profile) in lang_profiles.into_iter().enumerate() {
51 let profile_name = if let Some(name) = &profile.name {
52 name.clone()
53 } else {
54 "unknown".to_string()
55 };
56 println!("\tAdding profile: {} at index {}", profile_name, i);
57 if let Err(e) = factory.add_profile(profile, i, profile_count) {
58 println!("Error adding {} profile: {:?}", profile_name, e);
59 return;
60 }
61 }
62
63 // Load another profile (in documentation for generating profiles it is Sakha (Yakut) language - "sah")
64 let sah_path = Path::new("./scripts/datasets/generated").join("sah_generated.json");
65 let sah_json = LangProfileJson::new_from_file(sah_path);
66 match &sah_json {
67 Ok(_) => println!("Read Sakha JSON profile"),
68 Err(e) => {
69 println!("Error reading Sakha JSON profile: {:?}", e);
70 return;
71 }
72 }
73 let sah_profile = match LangProfile::from_json(sah_json.unwrap()) {
74 Ok(profile) => profile,
75 Err(e) => {
76 println!("Error creating Sakha LangProfile: {}", e);
77 return;
78 }
79 };
80 println!("Adding Sakha language profile to the factory");
81 if let Err(e) = factory.add_profile(sah_profile, profile_count - 1, profile_count) {
82 println!("Error adding Sakha profile: {:?}", e);
83 return;
84 }
85
86 println!("Testing language detection...");
87 // Test Russian text
88 match factory.detect("В своём глазу бревна не замечает, а в чужом соломинку видит", None) {
89 Ok(lang) => println!("\tRussian text detected as: {}", lang),
90 Err(e) => println!("Detection error: {:?}", e),
91 }
92
93 // Test English text
94 match factory.detect("He pays no attention to the plank in his own eye", None) {
95 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
96 Err(e) => println!("Detection error: {:?}", e),
97 }
98
99 // Test Sakha (Yakut) text
100 match factory.detect("Айаҕыттан тахсар сытыканы билбэт.", None) {
101 Ok(lang) => println!("\tSakha text detected as: {}", lang),
102 Err(e) => println!("Detection error: {:?}", e),
103 }
104}Sourcepub fn get_probabilities(
&self,
text: &str,
alpha: Option<f64>,
) -> Result<Vec<Language>, DetectorError>
pub fn get_probabilities( &self, text: &str, alpha: Option<f64>, ) -> Result<Vec<Language>, DetectorError>
Shortcut method to get language probabilities from text in one call.
§Arguments
text- The text to analyze.alpha- Optional alpha smoothing parameter.
§Returns
A vector of languages with their probabilities, sorted by probability descending.
§Example
use langdetect_rs::detector_factory::DetectorFactory;
let factory = DetectorFactory::default().build();
let result = factory.get_probabilities("Hello world!", None);Examples found in repository?
3fn main() {
4 let factory = DetectorFactory::default().build();
5
6 // let mut detector = factory.create(None);
7 match factory.detect("War doesn't show who's right, just who's left.", None) {
8 Ok(lang) => println!("Detected language: {}", lang),
9 Err(e) => println!("Detection error: {:?}", e),
10 }
11
12 // let mut detector = factory.create(None);
13 match factory.detect("Ein, zwei, drei, vier", None) {
14 Ok(lang) => println!("Detected language: {}", lang),
15 Err(e) => println!("Detection error: {:?}", e),
16 }
17
18 match factory.get_probabilities("Otec matka syn.", None) {
19 Ok(probs) => println!("Language probabilities: {:?}", probs),
20 Err(e) => println!("Detection error: {:?}", e),
21 }
22
23 // For reproducibility use a fixed seed within explicitly defined detector
24 let mut detector = factory.create(None);
25 detector.seed = Some(42);
26 detector.append("Otec matka syn.");
27 match detector.get_probabilities() {
28 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
29 Err(e) => println!("Detection error: {:?}", e),
30 }
31
32 // Or you can set the seed for the factory itself and it will be inherited by detectors
33 let factory_with_seed = DetectorFactory::default()
34 .with_seed(Some(43))
35 .build();
36 match factory_with_seed.get_probabilities("Otec matka syn.", None) {
37 Ok(probs) => println!("Language probabilities with seed: {:?}", probs),
38 Err(e) => println!("Detection error: {:?}", e),
39 }
40}More examples
5fn main() {
6 // Create an empty factory
7 let mut factory = DetectorFactory::new().build();
8
9 // Load language profiles from the crate's profiles directory
10 let profiles_dir = Path::new("./").join("profiles");
11
12 println!("Read JSON profiles from {}", profiles_dir.display());
13
14 // Load Russian profile
15 let ru_json = LangProfileJson::new_from_file(profiles_dir.join("ru"));
16 match &ru_json {
17 Ok(_) => println!("\tRead Russian JSON profile"),
18 Err(e) => {
19 println!("Error reading Russian JSON profile: {:?}", e);
20 return;
21 }
22 }
23 let ru_profile = match LangProfile::from_json(ru_json.unwrap()) {
24 Ok(profile) => profile,
25 Err(e) => {
26 println!("Error creating Russian LangProfile: {}", e);
27 return;
28 }
29 };
30
31 // Load English profile
32 let en_json = LangProfileJson::new_from_file(profiles_dir.join("en"));
33 match &en_json {
34 Ok(_) => println!("\tRead English JSON profile"),
35 Err(e) => {
36 println!("Error reading English JSON profile: {:?}", e);
37 return;
38 }
39 }
40 let en_profile = match LangProfile::from_json(en_json.unwrap()) {
41 Ok(profile) => profile,
42 Err(e) => {
43 println!("Error creating English LangProfile: {}", e);
44 return;
45 }
46 };
47
48 println!("Adding custom language profiles to the factory...");
49 // Add profiles to the factory
50 // Make sure to use correct language IDs as per your profiles
51 // And provide correct FINAL size of languages array
52 let final_size = 2; // Update this if you add more profiles
53 if let Err(e) = factory.add_profile(ru_profile, 0, final_size
54 ) {
55 println!("Error adding Russian profile: {:?}", e);
56 return;
57 }
58 println!("\tLoaded Russian profile");
59 if let Err(e) = factory.add_profile(en_profile, 1, final_size) {
60 println!("Error adding English profile: {:?}", e);
61 return;
62 }
63 println!("\tLoaded English profile");
64
65 println!("Factory loaded with {} languages: {:?}", factory.get_lang_list().len(), factory.get_lang_list());
66
67 println!("Testing language detection...");
68
69 // Test Russian text
70 match factory.detect("Привет, меня зовут Дима, и я разработчик", None) {
71 Ok(lang) => println!("\tRussian text detected as: {}", lang),
72 Err(e) => println!("Detection error: {:?}", e),
73 }
74
75 // Test English text
76 match factory.detect("Hello world! My name is Dima and I am a developer", None) {
77 Ok(lang) => println!("\tEnglish text detected as: {}", lang),
78 Err(e) => println!("Detection error: {:?}", e),
79 }
80
81 // Test French text (will be detected as the closest match from available languages)
82 // IMPORTANT: The algorithm always returns the best guess from loaded languages, never fails
83 // EXCEPTIONS: Returns error if no recognizable n-grams found, or "unknown" if all probabilities ≤ 0.1
84 // If you want to detect "unknown" languages, check probability thresholds or handle the error cases
85 match factory.detect("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
86 Ok(lang) => println!("\tFrench text detected as: {} (closest match from ru/en)", lang),
87 Err(e) => println!("Detection error: {:?}", e),
88 }
89
90 // Show probabilities for the French text to see why it was classified as English
91 match factory.get_probabilities("Bonjour tout le monde! Je m'appelle Dima et je suis développeur", None) {
92 Ok(probs) => {
93 println!("\tFrench text probabilities:");
94 for lang in probs {
95 println!("\t\t{}: {:.3}", lang.lang.unwrap_or_default(), lang.prob);
96 }
97 }
98 Err(e) => println!("Probability error: {:?}", e),
99 }
100}Sourcepub fn load_profile<P: AsRef<Path>>(
&mut self,
profile_directory: P,
) -> Result<(), String>
pub fn load_profile<P: AsRef<Path>>( &mut self, profile_directory: P, ) -> Result<(), String>
Loads all language profiles from a directory of JSON files.
§Arguments
profile_directory- Path to directory containing JSON profile files.
§Returns
Ok(()) on success, or an error string on failure.
§Example
use langdetect_rs::detector_factory::DetectorFactory;
let mut factory = DetectorFactory::new().build();
factory.load_profile("profiles/").unwrap();Trait Implementations§
Source§impl Clone for DetectorFactory
impl Clone for DetectorFactory
Source§fn clone(&self) -> DetectorFactory
fn clone(&self) -> DetectorFactory
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more