1use scirs2_text::{
7 DictionaryCorrector, ErrorModel, SpellingCorrector, StatisticalCorrector,
8 StatisticalCorrectorConfig,
9};
10use std::time::Instant;
11
12const TEXT_WITH_CONTEXT_MISSPELLINGS: &str =
14 "I went to the bnk to deposit some money. The river bnk was muddy after the rain. \
15 I recieved your mesage about the meeting. He recieved many complements on his work. \
16 Their was a problem with there computer. The museum disply had many historical artefcts.";
17
18const EXPECTED_CORRECTED_TEXT: &str =
20 "I went to the bank to deposit some money. The river bank was muddy after the rain. \
21 I received your message about the meeting. He received many compliments on his work. \
22 There was a problem with their computer. The museum display had many historical artifacts.";
23
24const SAMPLE_TRAINING_TEXT: &str =
26 "I went to the bank to deposit some money yesterday. The bank offers good interest rates. \
27 The river bank was muddy after the rain. We sat on the bank of the river and watched the sunset. \
28 I received your message about the meeting. Thank you for the message you sent. \
29 He received many compliments on his work. The teacher gave compliments to the students. \
30 There was a problem with their computer. Their car broke down on the highway. \
31 The museum display had many historical artifacts. The ancient artifacts were well preserved. \
32 The display was impressive and educational.";
33
34#[allow(dead_code)]
35fn main() -> Result<(), Box<dyn std::error::Error>> {
36 println!("Statistical Spelling Correction Demo\n");
37
38 let dict_corrector = DictionaryCorrector::default();
40
41 let mut stat_corrector = StatisticalCorrector::default();
43
44 train_language_model(&mut stat_corrector);
46
47 add_example_words(&mut stat_corrector);
49
50 compare_correctors(&dict_corrector, &stat_corrector)?;
52
53 context_aware_correction_demo(&stat_corrector)?;
55
56 performance_test(&dict_corrector, &stat_corrector)?;
58
59 configuration_demo()?;
61
62 noise_model_demo()?;
64
65 Ok(())
66}
67
68#[allow(dead_code)]
70fn train_language_model(corrector: &mut StatisticalCorrector) {
71 println!("Training language model with sample text...");
72
73 corrector.add_trainingtext(SAMPLE_TRAINING_TEXT);
75
76 let additional_examples = [
78 "I went to the bank to deposit money.",
80 "The bank is open until 5pm.",
81 "She works at the bank downtown.",
82 "I need to check my bank account.",
83 "We sat on the bank of the river.",
85 "The river bank was covered with flowers.",
86 "They fished from the bank of the lake.",
87 "The boat was tied to the bank.",
88 "There is a book on the table.",
90 "Their house is very beautiful.",
91 "They're going to the movies tonight.",
92 "There was a problem with the system.",
93 "Their car broke down yesterday.",
94 "They're planning a vacation next month.",
95 "He received many compliments on his presentation.",
97 "She gave him a compliment about his new haircut.",
98 "Red and green are complementary colors.",
99 "This wine complements the meal perfectly.",
100 "I received your message yesterday.",
102 "Please send me a message when you arrive.",
103 "The message was unclear and confusing.",
104 "She left a message on my voicemail.",
105 ];
106
107 for example in &additional_examples {
108 corrector.add_trainingtext(example);
109 }
110
111 println!(
112 "Language model trained with {} words vocabulary\n",
113 corrector.vocabulary_size()
114 );
115}
116
117#[allow(dead_code)]
119fn add_example_words(corrector: &mut StatisticalCorrector) {
120 let word_frequencies = [
122 ("bank", 100),
124 ("river", 100),
125 ("deposit", 100),
126 ("money", 100),
127 ("received", 100),
128 ("message", 100),
129 ("meeting", 100),
130 ("compliments", 100),
131 ("complements", 100),
132 ("work", 100),
133 ("there", 100),
134 ("their", 100),
135 ("they're", 100),
136 ("was", 100),
137 ("problem", 100),
138 ("computer", 100),
139 ("museum", 100),
140 ("display", 100),
141 ("historical", 100),
142 ("artifacts", 100),
143 ];
144
145 for (word, freq) in &word_frequencies {
146 corrector.add_word(word, *freq);
147 }
148}
149
150#[allow(dead_code)]
152fn compare_correctors(
153 dict_corrector: &DictionaryCorrector,
154 stat_corrector: &StatisticalCorrector,
155) -> Result<(), Box<dyn std::error::Error>> {
156 println!("=== Dictionary vs. Statistical Correction ===\n");
157
158 let test_cases = [
160 ("recieve", "receive"),
161 ("mesage", "message"),
162 ("bnk", "bank"),
163 ("thier", "their"),
164 ("complements", "compliments"), ("artefacts", "artifacts"),
166 ("disply", "display"),
167 ("definately", "definitely"),
168 ];
169
170 println!(
171 "{:<15} {:<15} {:<15}",
172 "Misspelled", "Dictionary", "Statistical"
173 );
174 println!("{:-<45}", "");
175
176 for (misspelled, _expected) in &test_cases {
177 let dict_correction = dict_corrector.correct(misspelled)?;
178 let stat_correction = stat_corrector.correct(misspelled)?;
179
180 println!("{misspelled:<15} {dict_correction:<15} {stat_correction:<15}");
181 }
182
183 println!("\nDictionary sizes:");
184 println!(
185 " - Dictionary _corrector: {} words",
186 dict_corrector.dictionary_size()
187 );
188 println!(
189 " - Statistical _corrector: {} words (+ {} in language model)",
190 stat_corrector.dictionary_size(),
191 stat_corrector.vocabulary_size()
192 );
193
194 Ok(())
195}
196
197#[allow(dead_code)]
199fn context_aware_correction_demo(
200 corrector: &StatisticalCorrector,
201) -> Result<(), Box<dyn std::error::Error>> {
202 println!("\n=== Context-Aware Correction Demo ===\n");
203
204 println!("Original text with misspellings:");
205 println!("{TEXT_WITH_CONTEXT_MISSPELLINGS}\n");
206
207 let correctedtext = corrector.correcttext(TEXT_WITH_CONTEXT_MISSPELLINGS)?;
209
210 println!("Corrected text:");
211 println!("{correctedtext}\n");
212
213 println!("Expected text:");
214 println!("{EXPECTED_CORRECTED_TEXT}\n");
215
216 println!("Specific context examples:\n");
218
219 let text1 = "I went to the bnk to deposit some money.";
221 let text2 = "The river bnk was muddy after the rain.";
222
223 println!("Example 1: 'bnk' in financial context");
224 println!("Before: {text1}");
225 println!("After: {}\n", corrector.correcttext(text1)?);
226
227 println!("Example 2: 'bnk' in geographical context");
228 println!("Before: {text2}");
229 println!("After: {}\n", corrector.correcttext(text2)?);
230
231 let text3 = "Their was a problem with the computer.";
233 let text4 = "There car broke down on the highway.";
234
235 println!("Example 3: 'their' used incorrectly");
236 println!("Before: {text3}");
237 println!("After: {}\n", corrector.correcttext(text3)?);
238
239 println!("Example 4: 'there' used incorrectly");
240 println!("Before: {text4}");
241 println!("After: {}\n", corrector.correcttext(text4)?);
242
243 Ok(())
244}
245
246#[allow(dead_code)]
248fn performance_test(
249 dict_corrector: &DictionaryCorrector,
250 stat_corrector: &StatisticalCorrector,
251) -> Result<(), Box<dyn std::error::Error>> {
252 println!("\n=== Performance Test ===\n");
253
254 let testtext = TEXT_WITH_CONTEXT_MISSPELLINGS.repeat(10);
256
257 let start = Instant::now();
259 let _ = dict_corrector.correcttext(&testtext)?;
260 let dict_time = start.elapsed();
261
262 let start = Instant::now();
264 let _ = stat_corrector.correcttext(&testtext)?;
265 let stat_time = start.elapsed();
266
267 let non_context_config = StatisticalCorrectorConfig {
269 use_context: false,
270 ..Default::default()
271 };
272 let mut non_context_corrector = StatisticalCorrector::new(non_context_config);
273
274 train_language_model(&mut non_context_corrector);
276 add_example_words(&mut non_context_corrector);
277
278 let start = Instant::now();
280 let _ = non_context_corrector.correcttext(&testtext)?;
281 let non_context_time = start.elapsed();
282
283 println!(
284 "Performance comparison on text with {} characters:",
285 testtext.len()
286 );
287 println!(" - Dictionary corrector: {dict_time:?}");
288 println!(" - Statistical _corrector (without context): {non_context_time:?}");
289 println!(" - Statistical _corrector (with context): {stat_time:?}");
290
291 Ok(())
292}
293
294#[allow(dead_code)]
296fn configuration_demo() -> Result<(), Box<dyn std::error::Error>> {
297 println!("\n=== Configuration Options Demo ===\n");
298
299 let configs = [
301 ("Default", StatisticalCorrectorConfig::default()),
302 (
303 "Conservative (max_edit_distance=1)",
304 StatisticalCorrectorConfig {
305 max_edit_distance: 1,
306 ..Default::default()
307 },
308 ),
309 (
310 "Aggressive (max_edit_distance=3)",
311 StatisticalCorrectorConfig {
312 max_edit_distance: 3,
313 ..Default::default()
314 },
315 ),
316 (
317 "Language model focused (weight=0.9)",
318 StatisticalCorrectorConfig {
319 language_model_weight: 0.9,
320 edit_distance_weight: 0.1,
321 ..Default::default()
322 },
323 ),
324 (
325 "Edit distance focused (weight=0.9)",
326 StatisticalCorrectorConfig {
327 language_model_weight: 0.1,
328 edit_distance_weight: 0.9,
329 ..Default::default()
330 },
331 ),
332 (
333 "No context",
334 StatisticalCorrectorConfig {
335 use_context: false,
336 ..Default::default()
337 },
338 ),
339 ];
340
341 let test_cases = [
343 "recieve", "accidant", "programing", "thier", "complements", ];
349
350 for (name, config) in &configs {
352 let mut corrector = StatisticalCorrector::new(config.clone());
353
354 train_language_model(&mut corrector);
356 add_example_words(&mut corrector);
357
358 println!("{name} configuration:");
359 println!(" max_editdistance: {}", config.max_edit_distance);
360 println!(" language_modelweight: {}", config.language_model_weight);
361 println!(" edit_distanceweight: {}", config.edit_distance_weight);
362 println!(" usecontext: {}", config.use_context);
363
364 println!("\n Correction examples:");
365 for word in &test_cases {
366 let corrected = corrector.correct(word)?;
367 println!(" {word} -> {corrected}");
368 }
369
370 if config.use_context {
372 let context_example = "Going to the bnk to deposit money. The river bnk was muddy.";
373 let corrected = corrector.correcttext(context_example)?;
374 println!("\n Context example:");
375 println!(" Before: {context_example}");
376 println!(" After: {corrected}");
377 }
378
379 println!("\n{:-<60}", "");
380 }
381
382 Ok(())
383}
384
385#[allow(dead_code)]
387fn noise_model_demo() -> Result<(), Box<dyn std::error::Error>> {
388 println!("\n=== Error Model Demo ===\n");
389
390 let models = [
392 ("Default", ErrorModel::default()),
393 ("Deletion-heavy", ErrorModel::new(0.7, 0.1, 0.1, 0.1)),
394 ("Insertion-heavy", ErrorModel::new(0.1, 0.7, 0.1, 0.1)),
395 ("Substitution-heavy", ErrorModel::new(0.1, 0.1, 0.7, 0.1)),
396 ("Transposition-heavy", ErrorModel::new(0.1, 0.1, 0.1, 0.7)),
397 ];
398
399 let test_pairs = [
401 ("recieve", "receive"), ("acheive", "achieve"), ("languge", "language"), ("programing", "programming"), ("probblem", "problem"), ("committe", "committee"), ("definately", "definitely"), ("seperate", "separate"), ];
410
411 println!(
413 "{:<20} {:<12} {:<12} {:<12} {:<12} {:<12}",
414 "Model", "Delete Prob", "Insert Prob", "Subst Prob", "Transp Prob", "Example"
415 );
416 println!("{:-<80}", "");
417
418 for (name, model) in &models {
419 let (typo, correct) = test_pairs[0];
421 let probability = model.error_probability(typo, correct);
422
423 println!(
424 "{:<20} {:<12.2} {:<12.2} {:<12.2} {:<12.2} {:<12.4}",
425 name,
426 model.p_deletion,
427 model.p_insertion,
428 model.p_substitution,
429 model.p_transposition,
430 probability
431 );
432 }
433
434 println!("\nError probabilities for different error types (using default model):");
435
436 let default_model = ErrorModel::default();
437
438 for (typo, correct) in &test_pairs {
439 let prob = default_model.error_probability(typo, correct);
440 println!("{typo:<12} -> {correct:<12}: {prob:.6}");
441 }
442
443 println!("\nImpact on correction with custom error model:");
444
445 let custom_config = StatisticalCorrectorConfig {
447 language_model_weight: 0.3,
448 edit_distance_weight: 0.7,
449 ..Default::default()
450 };
451
452 let mut custom_corrector = StatisticalCorrector::new(custom_config);
453 train_language_model(&mut custom_corrector);
454 add_example_words(&mut custom_corrector);
455
456 let transposition_model = ErrorModel::new(0.1, 0.1, 0.1, 0.7);
458 custom_corrector.set_error_model(transposition_model);
459
460 println!("\nCorrecting text with transposition-heavy error model:");
462 let testtext = "I recieved a mesage about thier acheivements.";
463 let corrected = custom_corrector.correcttext(testtext)?;
464
465 println!("Before: {testtext}");
466 println!("After: {corrected}");
467
468 Ok(())
469}