1use phf::phf_set;
2use std::path::{Path, PathBuf};
3
4#[doc(hidden)]
5mod cli;
6mod dictionary;
7mod model;
8mod passwords;
9
10pub use model::{
12 check_token_status, default_model_path, download_model, download_model_with_progress_bar,
13 model_exists, ModelError, TokenStatus,
14};
15
16#[doc(hidden)]
18pub use cli::download_with_progress_bar;
19
20#[derive(Debug, Clone, Copy, PartialEq)]
22pub enum Sensitivity {
23 High,
27
28 Medium,
31
32 Low,
36}
37
38pub struct GibberishDetector {
40 model_path: Option<PathBuf>,
41}
42
43impl GibberishDetector {
44 pub fn new() -> Self {
46 Self { model_path: None }
47 }
48
49 pub fn with_model<P: AsRef<Path>>(path: P) -> Self {
51 Self {
52 model_path: Some(path.as_ref().to_path_buf()),
53 }
54 }
55
56 pub fn has_enhanced_detection(&self) -> bool {
58 self.model_path
59 .as_ref()
60 .map(|p| model::Model::exists(p))
61 .unwrap_or(false)
62 }
63
64 pub fn is_gibberish(&self, text: &str, sensitivity: Sensitivity) -> bool {
66 if is_string_bad_quality(&text) {
67 return true;
68 }
69 let basic_result = run_basic_checks(text, sensitivity);
72
73 if basic_result {
75 return true;
76 }
77
78 if let Some(path) = &self.model_path {
80 if let Some(model) = model::Model::get_or_load(path) {
81 return model.predict(text);
83 }
84 }
85
86 basic_result
88 }
89}
90
91pub fn is_string_bad_quality(s: &str) -> bool {
93 let non_printable_ratio = calculate_non_printable_ratio(s);
95 if non_printable_ratio > 0.5 {
96 return true; }
98 if s.len() <= 3 {
99 return true;
100 }
101 return false
102}
103
104pub fn calculate_non_printable_ratio(text: &str) -> f32 {
107 let non_printable_count = text
108 .chars()
109 .filter(|&c| {
110 (c.is_control() && c != '\n' && c != '\r' && c != '\t') || !c.is_ascii()
112 })
113 .count();
114
115 non_printable_count as f32 / text.len() as f32
116}
117
118fn is_english_word(word: &str) -> bool {
119 dictionary::ENGLISH_WORDS.contains(word)
120}
121
122pub fn is_password(text: &str) -> bool {
152 passwords::PASSWORDS.contains(text)
153}
154pub fn is_gibberish(text: &str, sensitivity: Sensitivity) -> bool {
180 GibberishDetector::new().is_gibberish(text, sensitivity)
181}
182
183fn run_basic_checks(text: &str, sensitivity: Sensitivity) -> bool {
185 let cleaned = clean_text(text);
187
188 if cleaned.is_empty() {
190 return true;
191 }
192
193 if cleaned.len() < 10 {
195 let is_english = is_english_word(&cleaned);
196 return !is_english;
197 }
198
199 let words: Vec<&str> = cleaned
201 .split_whitespace()
202 .filter(|word| !word.is_empty())
203 .collect();
204
205 let english_words: Vec<&&str> = words.iter().filter(|w| is_english_word(w)).collect();
207 let english_word_count = english_words.len();
208 let english_word_ratio = if words.is_empty() {
209 0.0
210 } else {
211 english_word_count as f64 / words.len() as f64
212 };
213
214 let non_printable_count = text
216 .chars()
217 .filter(|&c| c < ' ' && c != '\n' && c != '\r' && c != '\t')
218 .count();
219
220 if non_printable_count > 0 {
222 return true;
223 }
224
225 let entropy = calculate_entropy(text);
227
228 let transition_score = calculate_transition_score(text);
230
231 let vowel_consonant_ratio = calculate_vowel_consonant_ratio(&cleaned);
233
234 let trigrams = generate_ngrams(&cleaned, 3);
236 let quadgrams = generate_ngrams(&cleaned, 4);
237
238 let valid_trigrams = trigrams
239 .iter()
240 .filter(|gram| COMMON_TRIGRAMS.contains(gram.as_str()))
241 .collect::<Vec<_>>();
242
243 let valid_quadgrams = quadgrams
244 .iter()
245 .filter(|gram| COMMON_QUADGRAMS.contains(gram.as_str()))
246 .collect::<Vec<_>>();
247
248 let trigram_score = if trigrams.is_empty() {
250 0.0
251 } else {
252 valid_trigrams.len() as f64 / trigrams.len() as f64
253 };
254
255 let quadgram_score = if quadgrams.is_empty() {
256 0.0
257 } else {
258 valid_quadgrams.len() as f64 / quadgrams.len() as f64
259 };
260
261 let mut composite_score = 0.0;
264
265 composite_score += english_word_ratio * 0.4;
267
268 composite_score += transition_score * 0.25;
270
271 composite_score += trigram_score * 0.15;
273 composite_score += quadgram_score * 0.1;
274
275 composite_score += if (0.3..=0.7).contains(&vowel_consonant_ratio) {
277 0.1
278 } else {
279 0.0
280 };
281
282 if !(3.5..=4.5).contains(&entropy) {
285 composite_score *= 0.8;
286 }
287
288 let length_factor = match cleaned.len() {
290 0..=20 => 0.7, 21..=50 => 0.8, 51..=100 => 0.9, 101..=200 => 1.0, _ => 1.1, };
296
297 let threshold = match sensitivity {
299 Sensitivity::Low => 0.35 * length_factor, Sensitivity::Medium => 0.25 * length_factor, Sensitivity::High => 0.15 * length_factor, };
303
304 if entropy > 4.5 && sensitivity != Sensitivity::High {
306 return true;
307 }
308
309 if english_word_ratio > 0.8 {
311 return false;
312 }
313
314 if english_word_count >= 3 && sensitivity != Sensitivity::Low {
316 return false;
317 }
318
319 if english_word_count == 0 && transition_score < 0.4 && sensitivity != Sensitivity::High {
321 return true;
322 }
323
324 composite_score < threshold
326}
327
328fn calculate_entropy(text: &str) -> f64 {
330 let text = text.to_lowercase();
331 let total_chars = text.chars().count() as f64;
332
333 if total_chars == 0.0 {
334 return 0.0;
335 }
336
337 let mut char_counts = std::collections::HashMap::new();
339 for c in text.chars() {
340 *char_counts.entry(c).or_insert(0) += 1;
341 }
342
343 let mut entropy = 0.0;
345 for &count in char_counts.values() {
346 let probability = count as f64 / total_chars;
347 entropy -= probability * probability.log2();
348 }
349
350 entropy
353}
354
355fn calculate_transition_score(text: &str) -> f64 {
357 let text = text.to_lowercase();
358 let chars: Vec<char> = text.chars().collect();
359
360 if chars.len() < 2 {
361 return 0.0;
362 }
363
364 let mut valid_transitions = 0;
365 let total_transitions = chars.len() - 1;
366
367 for i in 0..total_transitions {
368 let pair = format!("{}{}", chars[i], chars[i + 1]);
369 if COMMON_CHAR_PAIRS.contains(&pair.as_str()) {
370 valid_transitions += 1;
371 }
372 }
373
374 valid_transitions as f64 / total_transitions as f64
375}
376
377fn calculate_vowel_consonant_ratio(text: &str) -> f64 {
379 let vowels = ['a', 'e', 'i', 'o', 'u'];
380 let mut vowel_count = 0;
381 let mut consonant_count = 0;
382
383 for c in text.chars() {
384 if vowels.contains(&c) {
385 vowel_count += 1;
386 } else if c.is_alphabetic() {
387 consonant_count += 1;
388 }
389 }
390
391 if consonant_count == 0 {
392 return if vowel_count == 0 { 0.0 } else { 1.0 };
393 }
394
395 vowel_count as f64 / (vowel_count + consonant_count) as f64
396}
397
398static COMMON_CHAR_PAIRS: phf::Set<&'static str> = phf_set! {
400 "th", "he", "in", "er", "an", "re", "on", "at", "en", "nd",
401 "ti", "es", "or", "te", "of", "ed", "is", "it", "al", "ar",
402 "st", "to", "nt", "ng", "se", "ha", "as", "ou", "io", "le",
403 "ve", "co", "me", "de", "hi", "ri", "ro", "ic", "ne", "ea",
404 "ra", "ce", "li", "ch", "ll", "be", "ma", "si", "om", "ur"
405};
406
407static COMMON_QUADGRAMS: phf::Set<&'static str> = phf_set! {
408 "tion", "atio", "that", "ther", "with", "ment", "ions", "this",
409 "here", "from", "ould", "ting", "hich", "whic", "ctio", "ever",
410 "they", "thin", "have", "othe", "were", "tive", "ough", "ight"
411};
412
413static COMMON_TRIGRAMS: phf::Set<&'static str> = phf_set! {
414 "the", "and", "ing", "ion", "tio", "ent", "ati", "for", "her", "ter",
415 "hat", "tha", "ere", "con", "res", "ver", "all", "ons", "nce", "men",
416 "ith", "ted", "ers", "pro", "thi", "wit", "are", "ess", "not", "ive",
417 "was", "ect", "rea", "com", "eve", "per", "int", "est", "sta", "cti",
418 "ica", "ist", "ear", "ain", "one", "our", "iti", "rat", "ell", "ant"
419};
420
421static ENGLISH_LETTERS: phf::Set<char> = phf_set! {
422 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
423 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
424 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
425 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
426};
427
428fn clean_text(text: &str) -> String {
429 text.chars()
430 .map(|c| {
431 if ENGLISH_LETTERS.contains(&c) || c.is_ascii_digit() {
432 c.to_ascii_lowercase()
433 } else if c.is_whitespace() || c == '_' || c == '-' || c == '/' {
434 ' '
435 } else if c == ',' || c == '.' || c == '!' || c == '?' {
436 ' '
438 } else {
439 c.to_ascii_lowercase()
441 }
442 })
443 .collect()
444}
445
446fn generate_ngrams(text: &str, n: usize) -> Vec<String> {
447 let filtered: String = text
448 .to_lowercase()
449 .chars()
450 .map(|ch| {
451 if ENGLISH_LETTERS.contains(&ch) || ch.is_numeric() {
452 ch
453 } else {
454 ' '
455 }
456 })
457 .collect();
458
459 filtered
460 .split_whitespace()
461 .flat_map(|word| {
462 word.as_bytes()
463 .windows(n)
464 .filter_map(|window| String::from_utf8(window.to_vec()).ok())
465 })
466 .collect()
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472 use log::{debug, info, warn};
473
474 fn init_logger() {
476 let _ = env_logger::builder()
477 .filter_level(log::LevelFilter::Debug)
478 .is_test(true)
479 .try_init();
480 }
481
482 fn log_gibberish_analysis(text: &str) -> bool {
484 info!("==== ANALYZING TEXT: '{}' ====", text);
485
486 let cleaned = clean_text(text);
488 debug!("Cleaned text: '{}'", cleaned);
489
490 if cleaned.is_empty() {
492 info!("RESULT: GIBBERISH - Text is empty after cleaning");
493 return true;
494 }
495
496 if cleaned.len() < 10 {
498 let is_english = is_english_word(&cleaned);
499 debug!(
500 "Short text check: Is '{}' an English word? {}",
501 cleaned, is_english
502 );
503 if is_english {
504 info!("RESULT: NOT GIBBERISH - Short text is an English word");
505 return false;
506 } else {
507 info!("RESULT: GIBBERISH - Short text is not an English word");
508 return true;
509 }
510 }
511
512 let words: Vec<&str> = cleaned
514 .split_whitespace()
515 .filter(|word| !word.is_empty())
516 .collect();
517
518 debug!("Word count: {}", words.len());
519
520 let english_words: Vec<&&str> = words.iter().filter(|w| is_english_word(w)).collect();
522 debug!(
523 "English words: {} ({:?})",
524 english_words.len(),
525 english_words
526 );
527
528 let english_word_count = english_words.len();
529 let english_word_ratio = if words.is_empty() {
530 0.0
531 } else {
532 english_word_count as f64 / words.len() as f64
533 };
534 debug!("English word ratio: {:.4}", english_word_ratio);
535
536 let non_printable_count = text
538 .chars()
539 .filter(|&c| c < ' ' && c != '\n' && c != '\r' && c != '\t')
540 .count();
541
542 debug!("Non-printable character count: {}", non_printable_count);
543
544 if non_printable_count > 0 {
545 info!("RESULT: GIBBERISH - Contains non-printable characters");
546 return true;
547 }
548
549 let entropy = calculate_entropy(text);
551 debug!("Entropy score: {:.4}", entropy);
552
553 let transition_score = calculate_transition_score(text);
555 debug!("Transition score: {:.4}", transition_score);
556
557 let vc_ratio = calculate_vowel_consonant_ratio(text);
559 debug!("Vowel-consonant ratio: {:.4}", vc_ratio);
560
561 let possible_words = (3..=cleaned.len().min(10))
563 .flat_map(|len| {
564 cleaned
565 .as_bytes()
566 .windows(len)
567 .map(|window| std::str::from_utf8(window).unwrap_or(""))
568 .filter(|w| is_english_word(w))
569 .collect::<Vec<_>>()
570 })
571 .collect::<Vec<_>>();
572
573 debug!("English subwords found: {:?}", possible_words);
574
575 let trigrams = generate_ngrams(&cleaned, 3);
577 let quadgrams = generate_ngrams(&cleaned, 4);
578
579 let valid_trigrams = trigrams
580 .iter()
581 .filter(|gram| COMMON_TRIGRAMS.contains(gram.as_str()))
582 .collect::<Vec<_>>();
583
584 let valid_quadgrams = quadgrams
585 .iter()
586 .filter(|gram| COMMON_QUADGRAMS.contains(gram.as_str()))
587 .collect::<Vec<_>>();
588
589 debug!("All trigrams: {:?}", trigrams);
590 debug!("Valid trigrams: {:?}", valid_trigrams);
591
592 let trigram_score = if trigrams.is_empty() {
593 0.0
594 } else {
595 valid_trigrams.len() as f64 / trigrams.len() as f64
596 };
597 debug!("Trigram score: {:.4}", trigram_score);
598
599 debug!("All quadgrams: {:?}", quadgrams);
600 debug!("Valid quadgrams: {:?}", valid_quadgrams);
601
602 let quadgram_score = if quadgrams.is_empty() {
603 0.0
604 } else {
605 valid_quadgrams.len() as f64 / quadgrams.len() as f64
606 };
607 debug!("Quadgram score: {:.4}", quadgram_score);
608
609 let english_word_threshold = 0.2;
611 let trigram_threshold = 0.15;
612 let quadgram_threshold = 0.1;
613 let entropy_threshold = 4.5; let transition_threshold = 0.7;
615
616 debug!(
618 "English word ratio threshold check (> {}): {}",
619 english_word_threshold,
620 english_word_ratio > english_word_threshold
621 );
622 debug!(
623 "Trigram score threshold check (> {}): {}",
624 trigram_threshold,
625 trigram_score > trigram_threshold
626 );
627 debug!(
628 "Quadgram score threshold check (> {}): {}",
629 quadgram_threshold,
630 quadgram_score > quadgram_threshold
631 );
632 debug!(
633 "Entropy threshold check (< {}): {}",
634 entropy_threshold,
635 entropy < entropy_threshold
636 );
637 debug!(
638 "Transition score threshold check (> {}): {}",
639 transition_threshold,
640 transition_score > transition_threshold
641 );
642
643 let is_gibberish = !((english_word_ratio > english_word_threshold)
645 || (english_word_count >= 3)
646 || (trigram_score > trigram_threshold && quadgram_score > quadgram_threshold)
647 || (transition_score > transition_threshold && entropy < entropy_threshold));
648
649 if is_gibberish {
650 info!("RESULT: GIBBERISH - Failed threshold checks");
651 } else {
652 info!("RESULT: NOT GIBBERISH - Passed threshold checks");
653 }
654
655 is_gibberish
656 }
657
658 #[test]
660 fn test_common_passwords() {
661 assert!(is_password("123456"));
662 assert!(is_password("password"));
663 assert!(is_password("qwerty"));
664 assert!(is_password("abc123"));
665 }
666
667 #[test]
668 fn test_numeric_passwords() {
669 assert!(is_password("123456789"));
670 assert!(is_password("12345678"));
671 assert!(is_password("1234567"));
672 }
673
674 #[test]
675 fn test_word_passwords() {
676 assert!(is_password("iloveyou"));
677 assert!(is_password("admin"));
678 assert!(is_password("welcome"));
679 }
680
681 #[test]
682 fn test_non_passwords() {
683 assert!(!is_password("")); assert!(!is_password("this is not a password")); assert!(!is_password("verylongandunlikelypasswordthatnoonewoulduse")); assert!(!is_password("unique_string_123")); }
688
689 fn test_with_sensitivities(
691 text: &str,
692 expected_low: bool,
693 expected_med: bool,
694 expected_high: bool,
695 ) {
696 assert_eq!(is_gibberish(text, Sensitivity::Low), expected_low);
697 assert_eq!(is_gibberish(text, Sensitivity::Medium), expected_med);
698 assert_eq!(is_gibberish(text, Sensitivity::High), expected_high);
699 }
700
701 #[test]
702 fn test_clear_english_all_sensitivities() {
703 let text = "The quick brown fox jumps over the lazy dog.";
704 println!("\nTesting text: '{}'", text);
705
706 for sensitivity in [Sensitivity::Low, Sensitivity::Medium, Sensitivity::High] {
707 let cleaned = clean_text(text);
708 let words: Vec<&str> = cleaned.split_whitespace().collect();
709 let english_words: Vec<&&str> =
710 words.iter().filter(|word| is_english_word(word)).collect();
711
712 let trigrams = generate_ngrams(&cleaned, 3);
713 let quadgrams = generate_ngrams(&cleaned, 4);
714
715 let valid_trigrams = trigrams
716 .iter()
717 .filter(|gram| COMMON_TRIGRAMS.contains(gram.as_str()))
718 .collect::<Vec<_>>();
719 let valid_quadgrams = quadgrams
720 .iter()
721 .filter(|gram| COMMON_QUADGRAMS.contains(gram.as_str()))
722 .collect::<Vec<_>>();
723
724 println!("\nSensitivity {:?}:", sensitivity);
725 println!("Cleaned text: '{}'", cleaned);
726 println!(
727 "English words found: {} out of {}",
728 english_words.len(),
729 words.len()
730 );
731 println!("English words: {:?}", english_words);
732 println!(
733 "Trigram score: {:.3}",
734 if trigrams.is_empty() {
735 0.0
736 } else {
737 valid_trigrams.len() as f64 / trigrams.len() as f64
738 }
739 );
740 println!(
741 "Quadgram score: {:.3}",
742 if quadgrams.is_empty() {
743 0.0
744 } else {
745 valid_quadgrams.len() as f64 / quadgrams.len() as f64
746 }
747 );
748
749 let result = is_gibberish(text, sensitivity);
750 println!("Result: {}", if result { "GIBBERISH" } else { "ENGLISH" });
751 }
752
753 test_with_sensitivities(
754 text, false, false, false, );
758 }
759
760 #[test]
761 fn test_borderline_english_like_gibberish() {
762 init_logger();
763 let text = "Rcl maocr otmwi lit dnoen oehc 13 iron seah.";
764
765 info!("==== TESTING BORDERLINE ENGLISH LIKE GIBBERISH ====");
766 let is_gibberish_result = log_gibberish_analysis(text);
767
768 let lib_result = is_gibberish(text, Sensitivity::Medium);
770 if is_gibberish_result != lib_result {
771 warn!(
772 "WARNING: Analysis result ({}) differs from library result ({})",
773 is_gibberish_result, lib_result
774 );
775 }
776
777 test_with_sensitivities(
780 text, true, false, false, );
784 }
785
786 #[test]
787 fn test_english_without_spaces() {
788 assert!(!is_gibberish(
789 "HelloSkeletonsThisIsATestOfEnglishWithoutSpacesIHopeItWorks",
790 Sensitivity::Medium
791 ));
792 }
793
794 #[test]
795 fn test_clear_gibberish_all_sensitivities() {
796 test_with_sensitivities("!@#$%^&*()", true, true, true);
797 }
798
799 #[test]
800 fn test_english_word_with_ngrams() {
801 let text = "ther with tion";
802 println!("\n==== DEBUG: test_english_word_with_ngrams ====");
803 println!("Text: '{}'", text);
804
805 let cleaned = clean_text(text);
807 let words: Vec<&str> = cleaned.split_whitespace().collect();
808 let english_words: Vec<&&str> = words.iter().filter(|w| is_english_word(w)).collect();
809
810 println!("\n== Word Analysis ==");
811 println!("Total words: {}", words.len());
812 println!(
813 "English words: {} ({:?})",
814 english_words.len(),
815 english_words
816 );
817
818 let trigrams = generate_ngrams(&cleaned, 3);
820 let quadgrams = generate_ngrams(&cleaned, 4);
821
822 let valid_trigrams = trigrams
823 .iter()
824 .filter(|gram| COMMON_TRIGRAMS.contains(gram.as_str()))
825 .collect::<Vec<_>>();
826
827 let valid_quadgrams = quadgrams
828 .iter()
829 .filter(|gram| COMMON_QUADGRAMS.contains(gram.as_str()))
830 .collect::<Vec<_>>();
831
832 let trigram_score = if trigrams.is_empty() {
833 0.0
834 } else {
835 valid_trigrams.len() as f64 / trigrams.len() as f64
836 };
837 let quadgram_score = if quadgrams.is_empty() {
838 0.0
839 } else {
840 valid_quadgrams.len() as f64 / quadgrams.len() as f64
841 };
842
843 println!("\n== N-gram Analysis ==");
844 println!("Trigram score: {:.3}", trigram_score);
845 println!("Quadgram score: {:.3}", quadgram_score);
846
847 println!("\n== Test Assertion ==");
848 println!("Should classify as GIBBERISH with LOW sensitivity");
849 assert!(
850 !is_gibberish(text, Sensitivity::Low),
851 "Text with common n-grams should not be classified as gibberish with low sensitivity"
852 );
853 }
854
855 #[test]
857 fn test_pangram() {
858 assert!(!is_gibberish(
859 "The quick brown fox jumps over the lazy dog.",
860 Sensitivity::Medium
861 ));
862 }
863
864 #[test]
865 fn test_simple_sentence() {
866 assert!(!is_gibberish(
867 "This is a simple English sentence.",
868 Sensitivity::Medium
869 ));
870 }
871
872 #[test]
873 fn test_hello_world() {
874 init_logger();
875 let text = "Hello, world!";
876
877 info!("==== TESTING HELLO WORLD ====");
878 let is_gibberish_result = log_gibberish_analysis(text);
879
880 let lib_result = is_gibberish(text, Sensitivity::Medium);
882 if is_gibberish_result != lib_result {
883 warn!(
884 "WARNING: Analysis result ({}) differs from library result ({})",
885 is_gibberish_result, lib_result
886 );
887 }
888
889 assert!(!is_gibberish(text, Sensitivity::Medium));
890 }
891
892 #[test]
893 fn test_single_word() {
894 assert!(!is_gibberish("hello", Sensitivity::Medium));
895 }
896
897 #[test]
898 fn test_common_ngrams() {
899 assert!(!is_gibberish("ther with tion", Sensitivity::Medium));
900 }
901
902 #[test]
903 fn test_technical_text() {
904 assert!(!is_gibberish(
905 "The function returns a boolean value.",
906 Sensitivity::Medium
907 ));
908 }
909
910 #[test]
911 fn test_mixed_case() {
912 assert!(!is_gibberish(
913 "MiXeD cAsE text IS still English",
914 Sensitivity::Medium
915 ));
916 }
917
918 #[test]
919 fn test_with_punctuation() {
920 assert!(!is_gibberish(
921 "Hello! How are you? I'm doing well.",
922 Sensitivity::Medium
923 ));
924 }
925
926 #[test]
927 fn test_long_text() {
928 assert!(!is_gibberish("This is a longer piece of text that contains multiple sentences and should definitely be recognized as valid English content.", Sensitivity::Medium));
929 }
930
931 #[test]
933 fn test_numbers_only() {
934 assert!(is_gibberish("12345 67890", Sensitivity::Medium));
935 }
936
937 #[test]
938 fn test_empty_string() {
939 assert!(is_gibberish("", Sensitivity::Medium));
940 }
941
942 #[test]
943 fn test_non_english_chars() {
944 assert!(is_gibberish("你好世界", Sensitivity::Medium));
945 }
946
947 #[test]
948 fn test_special_chars() {
949 assert!(is_gibberish("!@#$%^&*()", Sensitivity::Medium));
950 }
951
952 #[test]
953 fn test_base64_like() {
954 assert!(is_gibberish("MOTCk4ywLLjjEE2=", Sensitivity::Medium));
955 }
956
957 #[test]
958 fn test_short_gibberish() {
959 assert!(is_gibberish("4-Fc@w7MF", Sensitivity::Medium));
960 }
961
962 #[test]
963 fn test_letter_substitution() {
964 assert!(is_gibberish("Vszzc hvwg wg zcbu", Sensitivity::Medium));
965 }
966
967 #[test]
969 fn test_single_letter() {
970 assert!(is_gibberish("a", Sensitivity::Medium));
971 }
972
973 #[test]
974 fn test_mixed_valid_invalid() {
975 assert!(!is_gibberish("hello xkcd world", Sensitivity::Medium));
976 }
977
978 #[test]
979 fn test_common_abbreviation() {
980 init_logger();
981 let text = "NASA FBI CIA";
982
983 info!("==== TESTING COMMON ABBREVIATION ====");
984 let is_gibberish_result = log_gibberish_analysis(text);
985
986 let lib_result = is_gibberish(text, Sensitivity::Medium);
988 if is_gibberish_result != lib_result {
989 warn!(
990 "WARNING: Analysis result ({}) differs from library result ({})",
991 is_gibberish_result, lib_result
992 );
993 }
994
995 assert!(!is_gibberish(text, Sensitivity::Medium));
996 }
997
998 #[test]
999 fn test_astar_search_gibberish_2() {
1000 init_logger();
1001 let text = "h2=ReOrS9DAnED8o";
1002
1003 let is_gibberish_result = log_gibberish_analysis(text);
1004
1005 let lib_result = is_gibberish(text, Sensitivity::Medium);
1007 if is_gibberish_result != lib_result {
1008 warn!(
1009 "WARNING: Analysis result ({}) differs from library result ({})",
1010 is_gibberish_result, lib_result
1011 );
1012 }
1013
1014 assert!(is_gibberish(text, Sensitivity::Medium));
1015 }
1016
1017 #[test]
1018 fn test_astar_search_gibberish_3() {
1019 let text = "\"D_{qU_RIO`zxE>T";
1020 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 3 ====");
1021 println!("Testing text: '{}'", text);
1022 assert!(is_gibberish(text, Sensitivity::Medium));
1023 }
1024
1025 #[test]
1026 fn test_astar_search_gibberish_4() {
1027 let text = "eDVD.ER#)U:FC_*9";
1028 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 4 ====");
1029 println!("Testing text: '{}'", text);
1030 assert!(is_gibberish(text, Sensitivity::Medium));
1031 }
1032
1033 #[test]
1034 fn test_astar_search_gibberish_5() {
1035 let text = "ST2dUnH9RI8a=Ste";
1036 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 5 ====");
1037 println!("Testing text: '{}'", text);
1038 assert!(is_gibberish(text, Sensitivity::Medium));
1039 }
1040
1041 #[test]
1042 fn test_astar_search_gibberish_6() {
1043 let text = "\"qxUD_ER_I>O{`Tz";
1044 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 6 ====");
1045 println!("Testing text: '{}'", text);
1046 assert!(is_gibberish(text, Sensitivity::Medium));
1047 }
1048
1049 #[test]
1050 fn test_astar_search_gibberish_7() {
1051 let text = "OQ\\:RAnuxw\\]@L}E";
1052 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 7 ====");
1053 println!("Testing text: '{}'", text);
1054 assert!(is_gibberish(text, Sensitivity::Medium));
1055 }
1056
1057 #[test]
1058 fn test_astar_search_gibberish_8() {
1059 let text = "nURa9TH28tISdS=e";
1060 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 8 ====");
1061 println!("Testing text: '{}'", text);
1062 assert!(is_gibberish(text, Sensitivity::Medium));
1063 }
1064
1065 #[test]
1066 fn test_astar_search_gibberish_9() {
1067 let text = "^Y+oU)cNT1,nd\"an";
1068 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 9 ====");
1069 println!("Testing text: '{}'", text);
1070 assert!(is_gibberish(text, Sensitivity::Medium));
1071 }
1072
1073 #[test]
1074 fn test_astar_search_gibberish_10() {
1075 let text = "R>iE:aC39edNTtAD";
1076 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 10 ====");
1077 println!("Testing text: '{}'", text);
1078 assert!(is_gibberish(text, Sensitivity::Medium));
1079 }
1080
1081 #[test]
1082 fn test_astar_search_gibberish_11() {
1083 let text = "pTD\"aTU\"z`^IT>Ex";
1084 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 11 ====");
1085 println!("Testing text: '{}'", text);
1086 assert!(is_gibberish(text, Sensitivity::Medium));
1087 }
1088
1089 #[test]
1090 fn test_astar_search_gibberish_12() {
1091 let text = "oD8eASEetEN=S29r";
1092 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 12 ====");
1093 println!("Testing text: '{}'", text);
1094 assert!(is_gibberish(text, Sensitivity::Medium));
1095 }
1096
1097 #[test]
1098 fn test_astar_search_gibberish_13() {
1099 init_logger();
1100 let text = "and\",nT1cNU)+o^Y";
1101
1102 let is_gibberish_result = log_gibberish_analysis(text);
1103
1104 let lib_result = is_gibberish(text, Sensitivity::Medium);
1106 if is_gibberish_result != lib_result {
1107 warn!(
1108 "WARNING: Analysis result ({}) differs from library result ({})",
1109 is_gibberish_result, lib_result
1110 );
1111 }
1112
1113 assert!(is_gibberish(text, Sensitivity::Medium));
1114 }
1115
1116 #[test]
1117 fn test_astar_search_gibberish_14() {
1118 let text = "caNnUd)\"+,on^TY1";
1119 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 14 ====");
1120 println!("Testing text: '{}'", text);
1121 assert!(is_gibberish(text, Sensitivity::Medium));
1122 }
1123
1124 #[test]
1125 fn test_astar_search_gibberish_15() {
1126 let text = "RoStES3EO9:Oeer>";
1127 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 15 ====");
1128 println!("Testing text: '{}'", text);
1129 assert!(is_gibberish(text, Sensitivity::Medium));
1130 }
1131
1132 #[test]
1133 fn test_astar_search_gibberish_16() {
1134 let text = "b-d,ooMpeST_#2*X";
1135 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 16 ====");
1136 println!("Testing text: '{}'", text);
1137 assert!(is_gibberish(text, Sensitivity::Medium));
1138 }
1139
1140 #[test]
1141 fn test_astar_search_gibberish_17() {
1142 let text = "RoStES2EO89Oeer=";
1143 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 17 ====");
1144 println!("Testing text: '{}'", text);
1145 assert!(is_gibberish(text, Sensitivity::Medium));
1146 }
1147
1148 #[test]
1149 fn test_astar_search_gibberish_18() {
1150 let text = "#IDP`a|{ryVE`>SU";
1151 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 18 ====");
1152 println!("Testing text: '{}'", text);
1153 assert!(is_gibberish(text, Sensitivity::Medium));
1154 }
1155
1156 #[test]
1157 fn test_astar_search_gibberish_19() {
1158 let text = "Y*#U_Nedp2oT,ob-";
1159 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 19 ====");
1160 println!("Testing text: '{}'", text);
1161 assert!(is_gibberish(text, Sensitivity::Medium));
1162 }
1163
1164 #[test]
1165 fn test_astar_search_gibberish_20() {
1166 let text = "t>9RSTdneaI:S3UH";
1167 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 20 ====");
1168 println!("Testing text: '{}'", text);
1169 assert!(is_gibberish(text, Sensitivity::Medium));
1170 }
1171
1172 #[test]
1173 fn test_astar_search_gibberish_21() {
1174 let text = "aRSUHdSI=te892nT";
1175 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 21 ====");
1176 println!("Testing text: '{}'", text);
1177 assert!(is_gibberish(text, Sensitivity::Medium));
1178 }
1179
1180 #[test]
1181 fn test_astar_search_gibberish_22() {
1182 let text = "cNU)+o^Yand\",nT1";
1183 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 22 ====");
1184 println!("Testing text: '{}'", text);
1185 assert!(is_gibberish(text, Sensitivity::Medium));
1186 }
1187
1188 #[test]
1189 fn test_astar_search_gibberish_23() {
1190 let text = "2To-#oYp*UNdeb_,";
1191 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 23 ====");
1192 println!("Testing text: '{}'", text);
1193 assert!(is_gibberish(text, Sensitivity::Medium));
1194 }
1195
1196 #[test]
1197 fn test_astar_search_gibberish_24() {
1198 let text = "R=tE9aN28eoNTeAO";
1199 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 24 ====");
1200 println!("Testing text: '{}'", text);
1201 assert!(is_gibberish(text, Sensitivity::Medium));
1202 }
1203
1204 #[test]
1205 fn test_astar_search_gibberish_25() {
1206 let text = "9DAnED8oh2=ReOrS";
1207 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 25 ====");
1208 println!("Testing text: '{}'", text);
1209 assert!(is_gibberish(text, Sensitivity::Medium));
1210 }
1211
1212 #[test]
1213 fn test_astar_search_gibberish_26() {
1214 let text = "=e9O2ESRotSE8erO";
1215 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 26 ====");
1216 println!("Testing text: '{}'", text);
1217 assert!(is_gibberish(text, Sensitivity::Medium));
1218 }
1219
1220 #[test]
1221 fn test_astar_search_gibberish_27() {
1222 let text = "o9DEnAD:SrOeR>3h";
1223 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 27 ====");
1224 println!("Testing text: '{}'", text);
1225 assert!(is_gibberish(text, Sensitivity::Medium));
1226 }
1227
1228 #[test]
1229 fn test_astar_search_gibberish_28() {
1230 let text = "z`^pTIEDT>\"aTx\"U";
1231 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 28 ====");
1232 println!("Testing text: '{}'", text);
1233 assert!(is_gibberish(text, Sensitivity::Medium));
1234 }
1235
1236 #[test]
1237 fn test_astar_search_gibberish_29() {
1238 let text = "2I'HicHd8a=Z-.;>";
1239 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 29 ====");
1240 println!("Testing text: '{}'", text);
1241 assert!(is_gibberish(text, Sensitivity::Medium));
1242 }
1243
1244 #[test]
1245 fn test_astar_search_gibberish_30() {
1246 let text = "Ia>`#{`|PyUrDESV";
1247 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 30 ====");
1248 println!("Testing text: '{}'", text);
1249 assert!(is_gibberish(text, Sensitivity::Medium));
1250 }
1251
1252 #[test]
1253 fn test_astar_search_gibberish_31() {
1254 init_logger();
1255 let text = "et";
1256
1257 let is_gibberish_result = log_gibberish_analysis(text);
1258
1259 let lib_result = is_gibberish(text, Sensitivity::Medium);
1261 if is_gibberish_result != lib_result {
1262 warn!(
1263 "WARNING: Analysis result ({}) differs from library result ({})",
1264 is_gibberish_result, lib_result
1265 );
1266 }
1267
1268 assert!(is_gibberish(text, Sensitivity::Medium));
1269 }
1270
1271 #[test]
1272 fn test_astar_search_gibberish_32() {
1273 init_logger();
1274 let text = "A";
1275
1276 let is_gibberish_result = log_gibberish_analysis(text);
1277
1278 let lib_result = is_gibberish(text, Sensitivity::Medium);
1280 if is_gibberish_result != lib_result {
1281 warn!(
1282 "WARNING: Analysis result ({}) differs from library result ({})",
1283 is_gibberish_result, lib_result
1284 );
1285 }
1286
1287 assert!(is_gibberish(text, Sensitivity::Medium));
1288 }
1289
1290 #[test]
1291 fn test_astar_search_gibberish_33() {
1292 let text = "B";
1293 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 33 ====");
1294 println!("Testing text: '{}'", text);
1295 assert!(is_gibberish(text, Sensitivity::Medium));
1296 }
1297
1298 #[test]
1299 fn test_astar_search_gibberish_34() {
1300 let text = "RoStES2EO89Oeer=";
1301 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 34 ====");
1302 println!("Testing text: '{}'", text);
1303 assert!(is_gibberish(text, Sensitivity::Medium));
1304 }
1305
1306 #[test]
1307 fn test_astar_search_gibberish_35() {
1308 let text = "RoStES2EO89Oeer=";
1309 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 35 ====");
1310 println!("Testing text: '{}'", text);
1311 assert!(is_gibberish(text, Sensitivity::Medium));
1312 }
1313
1314 #[test]
1315 fn test_astar_search_gibberish_36() {
1316 let text = "et";
1317 println!("\n==== TESTING ASTAR SEARCH GIBBERISH 36 ====");
1318 println!("Testing text: '{}'", text);
1319 assert!(is_gibberish(text, Sensitivity::Medium));
1320 }
1321
1322 #[test]
1323 fn test_astar_search_gibberish_37() {
1324 test_with_sensitivities(
1325 "Aastar search algorithm is a path finding algorithm",
1326 false,
1327 false,
1328 false,
1329 );
1330 }
1331
1332 #[test]
1333 fn test_cyrillic_gibberish() {
1334 test_with_sensitivities("%B:;@J A8 4>35= CG3DFL\\ <G697 ?K HAI", true, true, true);
1336 }
1337
1338 #[test]
1339 fn test_mixed_latin_gibberish() {
1340 test_with_sensitivities("xgcyzw Snh fabkqta,jedm ioopl uru v", true, true, true);
1342 }
1343
1344 #[test]
1345 fn test_binary_control_chars_gibberish() {
1346 let binary_gibberish = "\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\0\0\u{1}\u{1}\0\u{1}\0\0\0\u{1}\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\0\0\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\u{1}\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\0\0\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\0\0\u{1}\u{1}\0\0\0\0\u{1}\u{1}\u{1}\0\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\u{1}\0\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\0\0\0\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\0\u{1}\0\u{1}\u{1}\0\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\0\u{1}\0\0\u{1}\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0";
1348 test_with_sensitivities(binary_gibberish, true, true, true);
1349 }
1350
1351 #[test]
1352 fn test_all_gibberish_examples_medium_sensitivity() {
1353 assert!(is_gibberish(
1355 "%B:;@J A8 4>35= CG3DFL\\ <G697 ?K HAI",
1356 Sensitivity::Medium
1357 ));
1358 assert!(is_gibberish(
1359 "xgcyzw Snh fabkqta,jedm ioopl uru v",
1360 Sensitivity::Medium
1361 ));
1362
1363 let binary_gibberish = "\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\0\0\u{1}\u{1}\0\u{1}\0\0\0\u{1}\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\0\0\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\u{1}\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\0\0\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\0\0\u{1}\u{1}\0\0\0\0\u{1}\u{1}\u{1}\0\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\u{1}\0\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\0\0\0\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\0\u{1}\0\0\u{1}\0\u{1}\u{1}\0\0\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0\0\u{1}\0\0\u{1}\u{1}\0\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\0\0\u{1}\0\u{1}\0\0\0\0\0\u{1}\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\0\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\u{1}\0\u{1}\u{1}\u{1}\0\u{1}\0\u{1}\u{1}\u{1}\0";
1364 assert!(is_gibberish(binary_gibberish, Sensitivity::Medium));
1365 }
1366
1367 #[test]
1368 fn test_gibberish_string_1() {
1369 init_logger();
1370 let text = "ant nehoteeh ntaoe seen e tohetael";
1371 debug!("Testing gibberish string 1: '{}'", text);
1372
1373 let _is_gibberish_result = log_gibberish_analysis(text);
1375
1376 assert!(is_gibberish(text, Sensitivity::Low));
1378 }
1379
1380 #[test]
1381 fn test_gibberish_string_2() {
1382 init_logger();
1383 let text = "eoa nte neeseateh tot ne lhoteenah";
1384 debug!("Testing gibberish string 2: '{}'", text);
1385 assert!(is_gibberish(text, Sensitivity::Low));
1386 }
1387
1388 #[test]
1389 fn test_gibberish_string_3() {
1390 init_logger();
1391 let text = "nte neeseateh tot ne lhoteenahaoe";
1392 debug!("Testing gibberish string 3: '{}'", text);
1393 assert!(is_gibberish(text, Sensitivity::Low));
1394 }
1395
1396 #[test]
1397 fn test_gibberish_string_4() {
1398 init_logger();
1399 let text = "alehestnnhton o ee tee a eatohteen";
1400 debug!("Testing gibberish string 4: '{}'", text);
1401 assert!(is_gibberish(text, Sensitivity::Low));
1402 }
1403
1404 #[test]
1405 fn test_gibberish_string_5() {
1406 init_logger();
1407 let text = "h eee lee ahetes n ntoatohene nttoa";
1408 debug!("Testing gibberish string 5: '{}'", text);
1409 assert!(is_gibberish(text, Sensitivity::Low));
1410 }
1411
1412 #[test]
1413 fn test_gibberish_string_6() {
1414 init_logger();
1415 let text = "ana leeoehanteees t hot eenohet tn";
1416 debug!("Testing gibberish string 6: '{}'", text);
1417 assert!(is_gibberish(text, Sensitivity::Low));
1418 }
1419
1420 #[test]
1421 fn test_gibberish_string_7() {
1422 init_logger();
1423 let text = "eoahaneetohl en tot hetaeseen etn";
1424 debug!("Testing gibberish string 7: '{}'", text);
1425 assert!(is_gibberish(text, Sensitivity::Low));
1426 }
1427}