1use colored::*;
22use once_cell::sync::Lazy;
23use rand::Rng;
24use std::collections::HashMap;
25use std::sync::Mutex;
26
27const NOISE_CHARS: [char; 7] = ['*', '+', '~', '@', '#', '$', '%'];
28
29static SYNONYM_MAP: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
30 let mut m = HashMap::new();
31 m.insert("good", "great");
33 m.insert("bad", "poor");
34 m.insert("fast", "quick");
35 m.insert("slow", "gradual");
36 m.insert("big", "large");
37 m.insert("small", "tiny");
38 m.insert("happy", "glad");
39 m.insert("sad", "unhappy");
40 m.insert("smart", "clever");
41 m.insert("old", "aged");
42 m.insert("new", "fresh");
43 m.insert("hot", "warm");
44 m.insert("cold", "cool");
45 m.insert("hard", "tough");
46 m.insert("easy", "simple");
47 m.insert("start", "begin");
48 m.insert("end", "finish");
49 m.insert("make", "create");
50 m.insert("get", "obtain");
51 m.insert("use", "employ");
52 m.insert("say", "state");
53 m.insert("go", "proceed");
54 m.insert("see", "observe");
55 m.insert("know", "understand");
56 m.insert("think", "believe");
57 m.insert("come", "arrive");
58 m.insert("take", "acquire");
59 m.insert("give", "provide");
60 m.insert("find", "locate");
61 m.insert("tell", "inform");
62 m.insert("bright", "vivid");
64 m.insert("dark", "dim");
65 m.insert("clean", "pure");
66 m.insert("dirty", "grimy");
67 m.insert("strong", "powerful");
68 m.insert("weak", "frail");
69 m.insert("rich", "wealthy");
70 m.insert("young", "youthful");
71 m.insert("pretty", "beautiful");
72 m.insert("ugly", "hideous");
73 m.insert("loud", "noisy");
74 m.insert("quiet", "silent");
75 m.insert("angry", "furious");
76 m.insert("calm", "serene");
77 m.insert("brave", "courageous");
78 m.insert("scared", "frightened");
79 m.insert("funny", "amusing");
80 m.insert("serious", "solemn");
81 m.insert("kind", "gentle");
82 m.insert("cruel", "harsh");
83 m.insert("empty", "hollow");
84 m.insert("full", "packed");
85 m.insert("rough", "coarse");
86 m.insert("smooth", "sleek");
87 m.insert("sharp", "keen");
88 m.insert("dull", "blunt");
89 m.insert("deep", "profound");
90 m.insert("shallow", "superficial");
91 m.insert("wide", "broad");
92 m.insert("narrow", "slim");
93 m.insert("long", "lengthy");
94 m.insert("short", "brief");
95 m.insert("heavy", "weighty");
96 m.insert("light", "featherweight");
97 m.insert("warm", "heated");
98 m.insert("frozen", "icy");
99 m.insert("luminous", "bright");
100 m.insert("gloomy", "dreary");
101 m.insert("lively", "energetic");
102 m.insert("tired", "weary");
103 m.insert("healthy", "robust");
104 m.insert("sick", "ill");
105 m.insert("safe", "secure");
106 m.insert("dangerous", "hazardous");
107 m.insert("important", "crucial");
108 m.insert("trivial", "minor");
109 m.insert("simple", "plain");
110 m.insert("complex", "intricate");
111 m.insert("rare", "scarce");
112 m.insert("common", "ordinary");
113 m.insert("strange", "peculiar");
114 m.insert("normal", "typical");
115 m.insert("ancient", "archaic");
116 m.insert("modern", "contemporary");
117 m.insert("local", "regional");
118 m.insert("distant", "remote");
119 m.insert("walk", "stroll");
121 m.insert("run", "sprint");
122 m.insert("eat", "consume");
123 m.insert("drink", "sip");
124 m.insert("write", "compose");
125 m.insert("read", "peruse");
126 m.insert("speak", "articulate");
127 m.insert("listen", "hear");
128 m.insert("look", "glance");
129 m.insert("touch", "feel");
130 m.insert("help", "assist");
131 m.insert("stop", "halt");
132 m.insert("try", "attempt");
133 m.insert("fail", "falter");
134 m.insert("win", "triumph");
135 m.insert("forfeit", "lose");
136 m.insert("buy", "purchase");
137 m.insert("sell", "trade");
138 m.insert("build", "construct");
139 m.insert("break", "shatter");
140 m.insert("fix", "repair");
141 m.insert("cut", "slice");
142 m.insert("push", "shove");
143 m.insert("pull", "tug");
144 m.insert("throw", "toss");
145 m.insert("catch", "grab");
146 m.insert("jump", "leap");
147 m.insert("fall", "plunge");
148 m.insert("rise", "ascend");
149 m.insert("drop", "descend");
150 m.insert("open", "unlock");
151 m.insert("close", "shut");
152 m.insert("move", "shift");
153 m.insert("stay", "remain");
154 m.insert("change", "alter");
155 m.insert("grow", "expand");
156 m.insert("shrink", "diminish");
157 m.insert("show", "display");
158 m.insert("hide", "conceal");
159 m.insert("choose", "select");
160 m.insert("allow", "permit");
161 m.insert("prevent", "hinder");
162 m.insert("need", "require");
163 m.insert("want", "desire");
164 m.insert("like", "enjoy");
165 m.insert("hate", "despise");
166 m.insert("fear", "dread");
167 m.insert("love", "adore");
168 m.insert("send", "dispatch");
169 m.insert("receive", "accept");
170 m.insert("keep", "retain");
171 m.insert("misplace", "lose");
172 m.insert("follow", "pursue");
173 m.insert("lead", "guide");
174 m.insert("wait", "linger");
175 m.insert("hurry", "rush");
176 m.insert("agree", "concur");
177 m.insert("refuse", "decline");
178 m.insert("house", "dwelling");
180 m.insert("car", "vehicle");
181 m.insert("book", "volume");
182 m.insert("friend", "companion");
183 m.insert("work", "labor");
184 m.insert("time", "duration");
185 m.insert("way", "method");
186 m.insert("place", "location");
187 m.insert("thing", "object");
188 m.insert("part", "component");
189 m.insert("life", "existence");
190 m.insert("day", "period");
191 m.insert("man", "person");
192 m.insert("woman", "individual");
193 m.insert("child", "youth");
194 m.insert("world", "realm");
195 m.insert("school", "institution");
196 m.insert("country", "nation");
197 m.insert("city", "metropolis");
198 m.insert("family", "household");
199 m.insert("group", "collective");
200 m.insert("system", "framework");
201 m.insert("problem", "issue");
202 m.insert("idea", "concept");
203 m.insert("question", "inquiry");
204 m.insert("result", "outcome");
205 m.insert("road", "path");
206 m.insert("tree", "plant");
207 m.insert("water", "liquid");
208 m.insert("fire", "flame");
209 m.insert("glow", "light");
210 m.insert("sound", "noise");
211 m.insert("food", "nourishment");
212 m.insert("money", "currency");
213 m.insert("power", "strength");
214 m.insert("mind", "intellect");
215 m.insert("heart", "soul");
216 m.insert("hand", "palm");
217 m.insert("eye", "gaze");
218 m.insert("word", "term");
219 m.insert("story", "tale");
220 m.insert("truth", "fact");
221 m.insert("dream", "vision");
222 m.insert("goal", "objective");
223 m.insert("plan", "strategy");
224 m.insert("step", "stage");
225 m.insert("rule", "law");
226 m.insert("right", "privilege");
227 m.insert("choice", "option");
228 m.insert("chance", "opportunity");
229 m
230});
231
232static SYNONYM_OVERRIDES: Lazy<Mutex<HashMap<String, String>>> =
235 Lazy::new(|| Mutex::new(HashMap::new()));
236
237pub fn load_synonym_overrides(path: &str) -> Result<(), Box<dyn std::error::Error>> {
248 let content = std::fs::read_to_string(path)?;
249 let mut overrides = SYNONYM_OVERRIDES.lock().unwrap_or_else(|e| e.into_inner());
250 for (line_num, raw_line) in content.lines().enumerate() {
251 let line = raw_line.trim();
252 if line.is_empty() || line.starts_with('#') {
253 continue;
254 }
255 if let Some((k, v)) = line.split_once('\t') {
256 overrides.insert(k.trim().to_lowercase(), v.trim().to_string());
257 } else if let Some((k, v)) = line.split_once('=') {
258 overrides.insert(k.trim().to_lowercase(), v.trim().to_string());
259 } else {
260 eprintln!("[eot] synonym file line {}: parse error — {:?}", line_num + 1, line);
261 }
262 }
263 Ok(())
264}
265
266pub fn set_synonym_overrides(map: HashMap<String, String>) {
268 let mut overrides = SYNONYM_OVERRIDES.lock().unwrap_or_else(|e| e.into_inner());
269 *overrides = map;
270}
271
272fn synonym_lookup(token: &str) -> Option<String> {
274 let lower = token.to_lowercase();
275 {
276 let overrides = SYNONYM_OVERRIDES.lock().unwrap_or_else(|e| e.into_inner());
277 if let Some(v) = overrides.get(lower.as_str()) {
278 return Some(v.clone());
279 }
280 }
281 SYNONYM_MAP.get(lower.as_str()).map(|s| s.to_string())
282}
283
284#[derive(Debug, Clone)]
305pub enum Transform {
306 Reverse,
308 Uppercase,
310 Mock,
312 Noise,
314 Chaos,
316 Scramble,
318 Delete,
320 Synonym,
322 Delay(u64),
324 Chain(Vec<Transform>),
326}
327
328impl Transform {
329 pub fn from_str_loose(s: &str) -> Result<Self, String> {
341 let s = if let Some(rest) = s.strip_prefix("chain:") {
343 rest
344 } else {
345 s
346 };
347 if s.contains(',') {
349 let parts: Result<Vec<Transform>, String> = s
350 .split(',')
351 .map(|part| Transform::from_str_single(part.trim()))
352 .collect();
353 let transforms = parts?;
354 if transforms.len() == 1 {
355 return transforms
357 .into_iter()
358 .next()
359 .ok_or_else(|| "internal: empty transform list".to_string());
360 }
361 return Ok(Transform::Chain(transforms));
362 }
363 Transform::from_str_single(s)
364 }
365
366 fn from_str_single(s: &str) -> Result<Self, String> {
367 let lower = s.to_lowercase();
368 if lower.starts_with("delay:") {
370 let ms: u64 = lower
371 .strip_prefix("delay:")
372 .and_then(|n| n.parse().ok())
373 .unwrap_or(100);
374 return Ok(Transform::Delay(ms));
375 }
376 match lower.as_str() {
377 "reverse" => Ok(Transform::Reverse),
378 "uppercase" => Ok(Transform::Uppercase),
379 "mock" => Ok(Transform::Mock),
380 "noise" => Ok(Transform::Noise),
381 "chaos" => Ok(Transform::Chaos),
382 "scramble" => Ok(Transform::Scramble),
383 "delete" => Ok(Transform::Delete),
384 "synonym" => Ok(Transform::Synonym),
385 "delay" => Ok(Transform::Delay(100)),
386 _ => Err(format!("Unknown transform: {}", s)),
387 }
388 }
389
390 pub fn apply_with_label_rng<R: Rng>(&self, token: &str, rng: &mut R) -> (String, String) {
395 match self {
396 Transform::Reverse => (token.chars().rev().collect(), "reverse".to_string()),
397 Transform::Uppercase => (token.to_uppercase(), "uppercase".to_string()),
398 Transform::Mock => (apply_mock(token), "mock".to_string()),
399 Transform::Noise => {
400 let noise_char = NOISE_CHARS[rng.gen_range(0..NOISE_CHARS.len())];
401 (format!("{}{}", token, noise_char), "noise".to_string())
402 }
403 Transform::Scramble => {
404 let mut chars: Vec<char> = token.chars().collect();
405 let n = chars.len();
407 for i in (1..n).rev() {
408 let j = rng.gen_range(0..=i);
409 chars.swap(i, j);
410 }
411 (chars.into_iter().collect(), "scramble".to_string())
412 }
413 Transform::Delete => (String::new(), "delete".to_string()),
414 Transform::Synonym => {
415 let result = synonym_lookup(token).unwrap_or_else(|| token.to_string());
416 (result, "synonym".to_string())
417 }
418 Transform::Delay(_) => (token.to_string(), "delay".to_string()),
419 Transform::Chaos => match rng.gen_range(0u8..4) {
420 0 => (token.chars().rev().collect(), "reverse".to_string()),
421 1 => (token.to_uppercase(), "uppercase".to_string()),
422 2 => (apply_mock(token), "mock".to_string()),
423 _ => {
424 let noise_char = NOISE_CHARS[rng.gen_range(0..NOISE_CHARS.len())];
425 (format!("{}{}", token, noise_char), "noise".to_string())
426 }
427 },
428 Transform::Chain(transforms) => {
429 let mut current = token.to_string();
430 let mut labels: Vec<String> = Vec::new();
431 for t in transforms {
432 let (next, label) = t.apply_with_label_rng(¤t, rng);
433 current = next;
434 labels.push(label);
435 }
436 (current, labels.join("+"))
437 }
438 }
439 }
440
441 pub fn apply_rng<R: Rng>(&self, token: &str, rng: &mut R) -> String {
443 self.apply_with_label_rng(token, rng).0
444 }
445
446 pub fn apply_with_label(&self, token: &str) -> (String, String) {
449 self.apply_with_label_rng(token, &mut rand::thread_rng())
450 }
451
452 pub fn apply_at_rate(&self, token: &str, rate: f64) -> String {
455 debug_assert!(rate >= 0.0 && rate <= 1.0, "rate must be in [0, 1]");
456 let _ = rate;
457 self.apply(token)
458 }
459
460 pub fn apply(&self, token: &str) -> String {
464 self.apply_with_label(token).0
465 }
466
467 pub fn apply_with_rate_check(&self, token: &str, rate: f64) -> String {
469 debug_assert!(rate >= 0.0 && rate <= 1.0, "rate must be in [0, 1]");
470 let _ = rate;
471 self.apply(token)
472 }
473}
474
475fn apply_mock(token: &str) -> String {
477 token
478 .chars()
479 .enumerate()
480 .map(|(i, c)| {
481 if i % 2 == 0 {
482 c.to_lowercase().next().unwrap_or(c)
483 } else {
484 c.to_uppercase().next().unwrap_or(c)
485 }
486 })
487 .collect()
488}
489
490fn is_cjk(ch: char) -> bool {
492 matches!(ch,
493 '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{20000}'..='\u{2A6DF}' | '\u{F900}'..='\u{FAFF}' | '\u{3000}'..='\u{303F}' | '\u{FF00}'..='\u{FFEF}' )
500}
501
502fn is_word_boundary_punct(ch: char) -> bool {
505 ch.is_ascii_punctuation()
506 || matches!(
507 ch,
508 '\u{2014}' | '\u{2013}' | '\u{2026}' | '«'
512 | '»'
513 | '\u{201C}' | '\u{201D}' | '\u{2018}' | '\u{2019}' | '„'
518 | '‹'
519 | '›'
520 | '·'
521 )
522}
523
524pub fn tokenize(text: &str) -> Vec<String> {
526 let mut tokens = Vec::new();
527 let mut current_token = String::new();
528
529 for ch in text.chars() {
530 if ch.is_whitespace() || is_word_boundary_punct(ch) {
531 if !current_token.is_empty() {
532 tokens.push(current_token.clone());
533 current_token.clear();
534 }
535 if !ch.is_whitespace() {
536 tokens.push(ch.to_string());
537 }
538 if ch.is_whitespace() {
539 tokens.push(ch.to_string());
540 }
541 } else if is_cjk(ch) {
542 if !current_token.is_empty() {
544 tokens.push(current_token.clone());
545 current_token.clear();
546 }
547 tokens.push(ch.to_string());
548 } else {
549 current_token.push(ch);
550 }
551 }
552
553 if !current_token.is_empty() {
554 tokens.push(current_token);
555 }
556
557 tokens
558}
559
560pub fn calculate_token_importance_rng<R: rand::Rng>(
564 token: &str,
565 position: usize,
566 rng: &mut R,
567) -> f64 {
568 let mut importance = 0.0;
569
570 importance += (token.len() as f64 / 20.0).min(0.3);
571
572 let position_factor = if !(5..=50).contains(&position) {
573 0.3
574 } else {
575 0.1
576 };
577 importance += position_factor;
578
579 if token.chars().any(|c| c.is_uppercase()) {
580 importance += 0.2;
581 }
582
583 let important_patterns = [
584 "the",
585 "and",
586 "or",
587 "but",
588 "if",
589 "when",
590 "where",
591 "how",
592 "why",
593 "what",
594 "robot",
595 "AI",
596 "technology",
597 "system",
598 "data",
599 "algorithm",
600 "model",
601 "create",
602 "build",
603 "develop",
604 "analyze",
605 "process",
606 "generate",
607 ];
608
609 let lower_token = token.to_lowercase();
610 if important_patterns
611 .iter()
612 .any(|&pattern| lower_token.contains(pattern))
613 {
614 importance += 0.3;
615 }
616
617 if token.chars().all(|c| c.is_ascii_punctuation()) {
618 importance *= 0.1;
619 }
620
621 importance += rng.gen_range(-0.1..0.1);
622
623 importance.clamp(0.0, 1.0)
624}
625
626pub fn calculate_token_importance(token: &str, position: usize) -> f64 {
631 calculate_token_importance_rng(token, position, &mut rand::thread_rng())
632}
633
634pub fn apply_heatmap_color(token: &str, importance: f64) -> String {
636 match importance {
637 i if i >= 0.8 => token.on_bright_red().bright_white().to_string(),
638 i if i >= 0.6 => token.on_red().bright_white().to_string(),
639 i if i >= 0.4 => token.on_yellow().black().to_string(),
640 i if i >= 0.2 => token.on_blue().bright_white().to_string(),
641 _ => token.normal().to_string(),
642 }
643}
644
645#[cfg(test)]
646mod tests {
647 use super::*;
648
649 #[test]
652 fn test_transform_reverse() {
653 assert_eq!(Transform::Reverse.apply("hello"), "olleh");
654 assert_eq!(Transform::Reverse.apply("world"), "dlrow");
655 }
656
657 #[test]
658 fn test_transform_uppercase() {
659 assert_eq!(Transform::Uppercase.apply("hello"), "HELLO");
660 assert_eq!(Transform::Uppercase.apply("world"), "WORLD");
661 }
662
663 #[test]
664 fn test_transform_mock() {
665 assert_eq!(Transform::Mock.apply("hello"), "hElLo");
666 assert_eq!(Transform::Mock.apply("world"), "wOrLd");
667 }
668
669 #[test]
670 fn test_transform_noise() {
671 let result = Transform::Noise.apply("hello");
672 assert!(result.starts_with("hello"));
673 assert!(result.len() > 5);
674 }
675
676 #[test]
677 fn test_transform_from_str_valid() {
678 assert!(matches!(
679 Transform::from_str_loose("reverse"),
680 Ok(Transform::Reverse)
681 ));
682 assert!(matches!(
683 Transform::from_str_loose("uppercase"),
684 Ok(Transform::Uppercase)
685 ));
686 assert!(matches!(
687 Transform::from_str_loose("mock"),
688 Ok(Transform::Mock)
689 ));
690 assert!(matches!(
691 Transform::from_str_loose("noise"),
692 Ok(Transform::Noise)
693 ));
694 }
695
696 #[test]
697 fn test_transform_from_str_invalid() {
698 assert!(Transform::from_str_loose("invalid").is_err());
699 assert!(Transform::from_str_loose("").is_err());
700 assert!(Transform::from_str_loose("foo").is_err());
701 assert!(Transform::from_str_loose("REVERSED").is_err());
702 }
703
704 #[test]
705 fn test_transform_from_str_case_insensitive() {
706 assert!(matches!(
707 Transform::from_str_loose("REVERSE"),
708 Ok(Transform::Reverse)
709 ));
710 assert!(matches!(
711 Transform::from_str_loose("Uppercase"),
712 Ok(Transform::Uppercase)
713 ));
714 assert!(matches!(
715 Transform::from_str_loose("MoCk"),
716 Ok(Transform::Mock)
717 ));
718 }
719
720 #[test]
721 fn test_transform_empty_inputs() {
722 assert_eq!(Transform::Reverse.apply(""), "");
723 assert_eq!(Transform::Uppercase.apply(""), "");
724 assert_eq!(Transform::Mock.apply(""), "");
725 assert_eq!(Transform::Noise.apply("").len(), 1);
726 }
727
728 #[test]
729 fn test_transform_single_char() {
730 assert_eq!(Transform::Reverse.apply("a"), "a");
731 assert_eq!(Transform::Mock.apply("a"), "a");
732 assert_eq!(Transform::Mock.apply("A"), "a");
733 }
734
735 #[test]
736 fn test_transform_mock_two_chars() {
737 assert_eq!(Transform::Mock.apply("ab"), "aB");
738 assert_eq!(Transform::Mock.apply("AB"), "aB");
739 }
740
741 #[test]
742 fn test_transform_preserves_length() {
743 let inputs = ["hello", "a", "ab", "abcdefghij", ""];
744 for input in &inputs {
745 assert_eq!(Transform::Reverse.apply(input).len(), input.len());
746 assert_eq!(Transform::Uppercase.apply(input).len(), input.len());
747 assert_eq!(Transform::Mock.apply(input).len(), input.len());
748 }
749 }
750
751 #[test]
752 fn test_transform_noise_appends_one_char() {
753 for _ in 0..20 {
754 let result = Transform::Noise.apply("test");
755 assert_eq!(result.len(), 5);
756 assert!(result.starts_with("test"));
757 }
758 }
759
760 #[test]
761 fn test_transform_noise_char_from_set() {
762 let noise_set = ['*', '+', '~', '@', '#', '$', '%'];
763 for _ in 0..50 {
764 let result = Transform::Noise.apply("x");
765 let noise_char = result.chars().last().expect("should have noise char");
766 assert!(
767 noise_set.contains(&noise_char),
768 "unexpected: {}",
769 noise_char
770 );
771 }
772 }
773
774 #[test]
775 fn test_reverse_is_involution() {
776 let token = "hello";
777 assert_eq!(
778 Transform::Reverse.apply(&Transform::Reverse.apply(token)),
779 token
780 );
781 }
782
783 #[test]
784 fn test_uppercase_is_idempotent() {
785 let once = Transform::Uppercase.apply("hello");
786 assert_eq!(Transform::Uppercase.apply(&once), once);
787 }
788
789 #[test]
790 fn test_noise_length_always_plus_one() {
791 for token in &["a", "hello", "test123", ""] {
792 assert_eq!(Transform::Noise.apply(token).len(), token.len() + 1);
793 }
794 }
795
796 #[test]
797 fn test_all_transforms_produce_different_results() {
798 let results: Vec<String> = [Transform::Reverse, Transform::Uppercase, Transform::Mock]
799 .iter()
800 .map(|t| t.apply("hello"))
801 .collect();
802 assert_ne!(results[0], results[1]);
803 assert_ne!(results[1], results[2]);
804 assert_ne!(results[0], results[2]);
805 }
806
807 #[test]
808 fn test_uppercase_already_upper() {
809 assert_eq!(Transform::Uppercase.apply("HELLO"), "HELLO");
810 }
811
812 #[test]
813 fn test_uppercase_with_numbers() {
814 assert_eq!(Transform::Uppercase.apply("test123"), "TEST123");
815 }
816
817 #[test]
818 fn test_reverse_with_numbers() {
819 assert_eq!(Transform::Reverse.apply("abc123"), "321cba");
820 }
821
822 #[test]
823 fn test_mock_longer_string() {
824 assert_eq!(Transform::Mock.apply("abcdef"), "aBcDeF");
825 }
826
827 #[test]
830 fn test_tokenize_simple_sentence() {
831 let tokens = tokenize("hello world");
832 assert!(tokens.contains(&"hello".to_string()));
833 assert!(tokens.contains(&"world".to_string()));
834 }
835
836 #[test]
837 fn test_tokenize_with_punctuation() {
838 let tokens = tokenize("hello, world!");
839 assert!(tokens.contains(&"hello".to_string()));
840 assert!(tokens.contains(&",".to_string()));
841 assert!(tokens.contains(&"world".to_string()));
842 assert!(tokens.contains(&"!".to_string()));
843 }
844
845 #[test]
846 fn test_tokenize_empty() {
847 assert!(tokenize("").is_empty());
848 }
849
850 #[test]
851 fn test_tokenize_single_word() {
852 assert_eq!(tokenize("hello"), vec!["hello"]);
853 }
854
855 #[test]
856 fn test_tokenize_only_whitespace() {
857 assert!(tokenize(" ").iter().all(|t| t.trim().is_empty()));
858 }
859
860 #[test]
861 fn test_tokenize_only_punctuation() {
862 assert_eq!(tokenize("..."), vec![".", ".", "."]);
863 }
864
865 #[test]
866 fn test_tokenize_mixed() {
867 let tokens = tokenize("hello,world");
868 assert_eq!(tokens, vec!["hello", ",", "world"]);
869 }
870
871 #[test]
872 fn test_tokenize_preserves_all_chars() {
873 let input = "hello, world! foo";
874 assert_eq!(tokenize(input).join(""), input);
875 }
876
877 #[test]
878 fn test_tokenize_multiple_spaces() {
879 let tokens = tokenize("a b");
880 assert!(tokens.contains(&"a".to_string()));
881 assert!(tokens.contains(&"b".to_string()));
882 }
883
884 #[test]
885 fn test_tokenize_leading_trailing_space() {
886 assert!(tokenize(" hello ").iter().any(|t| t == "hello"));
887 }
888
889 #[test]
890 fn test_tokenize_numbers() {
891 let tokens = tokenize("42 is the answer");
892 assert!(tokens.contains(&"42".to_string()));
893 }
894
895 #[test]
898 fn test_importance_clamped() {
899 for pos in 0..100 {
900 let imp = calculate_token_importance("test", pos);
901 assert!(imp >= 0.0 && imp <= 1.0);
902 }
903 }
904
905 #[test]
906 fn test_punctuation_low_importance() {
907 let mut total = 0.0;
908 for _ in 0..100 {
909 total += calculate_token_importance(".", 25);
910 }
911 assert!(total / 100.0 < 0.3);
912 }
913
914 #[test]
915 fn test_importance_early_position_boost() {
916 let early: f64 = (0..5)
917 .map(|p| calculate_token_importance("word", p))
918 .sum::<f64>()
919 / 5.0;
920 let mid: f64 = (10..15)
921 .map(|p| calculate_token_importance("word", p))
922 .sum::<f64>()
923 / 5.0;
924 assert!(early > mid - 0.2);
925 }
926
927 #[test]
928 fn test_importance_uppercase_boost() {
929 let n = 200;
930 let upper: f64 = (0..n)
931 .map(|_| calculate_token_importance("AI", 25))
932 .sum::<f64>()
933 / n as f64;
934 let lower: f64 = (0..n)
935 .map(|_| calculate_token_importance("ai", 25))
936 .sum::<f64>()
937 / n as f64;
938 assert!(upper > lower);
939 }
940
941 #[test]
942 fn test_importance_keyword_boost() {
943 let n = 200;
944 let kw: f64 = (0..n)
945 .map(|_| calculate_token_importance("algorithm", 25))
946 .sum::<f64>()
947 / n as f64;
948 let plain: f64 = (0..n)
949 .map(|_| calculate_token_importance("xyz", 25))
950 .sum::<f64>()
951 / n as f64;
952 assert!(kw > plain);
953 }
954
955 #[test]
956 fn test_importance_long_token_boost() {
957 let n = 200;
958 let long: f64 = (0..n)
959 .map(|_| calculate_token_importance("supercalifragilistic", 25))
960 .sum::<f64>()
961 / n as f64;
962 let short: f64 = (0..n)
963 .map(|_| calculate_token_importance("a", 25))
964 .sum::<f64>()
965 / n as f64;
966 assert!(long > short);
967 }
968
969 #[test]
970 fn test_importance_all_tokens_in_range() {
971 let tokens = [
972 ".",
973 ",",
974 "!",
975 "?",
976 "a",
977 "AI",
978 "algorithm",
979 "the",
980 "superlongtoken",
981 ];
982 for token in &tokens {
983 for pos in [0, 1, 5, 25, 50, 100] {
984 let imp = calculate_token_importance(token, pos);
985 assert!(imp >= 0.0 && imp <= 1.0);
986 }
987 }
988 }
989
990 #[test]
993 fn test_heatmap_color_nonempty() {
994 for level in [0.0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0] {
995 assert!(!apply_heatmap_color("test", level).is_empty());
996 }
997 }
998
999 #[test]
1000 fn test_heatmap_color_contains_text() {
1001 assert!(apply_heatmap_color("mytoken", 0.5).contains("mytoken"));
1002 }
1003
1004 #[test]
1007 fn test_transform_chaos_from_str() {
1008 assert!(matches!(
1009 Transform::from_str_loose("chaos"),
1010 Ok(Transform::Chaos)
1011 ));
1012 }
1013
1014 #[test]
1015 fn test_transform_chaos_from_str_case_insensitive() {
1016 assert!(matches!(
1017 Transform::from_str_loose("CHAOS"),
1018 Ok(Transform::Chaos)
1019 ));
1020 assert!(matches!(
1021 Transform::from_str_loose("Chaos"),
1022 Ok(Transform::Chaos)
1023 ));
1024 }
1025
1026 #[test]
1027 fn test_transform_chaos_apply_nonempty() {
1028 for _ in 0..20 {
1029 let result = Transform::Chaos.apply("hello");
1030 assert!(!result.is_empty());
1031 }
1032 }
1033
1034 #[test]
1035 fn test_transform_chaos_apply_with_label_returns_known_label() {
1036 let known = ["reverse", "uppercase", "mock", "noise"];
1037 for _ in 0..50 {
1038 let (_text, label) = Transform::Chaos.apply_with_label("hello");
1039 assert!(
1040 known.contains(&label.as_str()),
1041 "unexpected label: {}",
1042 label
1043 );
1044 }
1045 }
1046
1047 #[test]
1048 fn test_transform_chaos_apply_with_label_text_nonempty() {
1049 for _ in 0..20 {
1050 let (text, _label) = Transform::Chaos.apply_with_label("world");
1051 assert!(!text.is_empty());
1052 }
1053 }
1054
1055 #[test]
1056 fn test_transform_chaos_empty_input() {
1057 let (_text, label) = Transform::Chaos.apply_with_label("");
1059 let known = ["reverse", "uppercase", "mock", "noise"];
1060 assert!(known.contains(&label.as_str()));
1061 }
1062
1063 #[test]
1064 fn test_apply_with_label_non_chaos_label_matches_name() {
1065 assert_eq!(Transform::Reverse.apply_with_label("hi").1, "reverse");
1066 assert_eq!(Transform::Uppercase.apply_with_label("hi").1, "uppercase");
1067 assert_eq!(Transform::Mock.apply_with_label("hi").1, "mock");
1068 assert_eq!(Transform::Noise.apply_with_label("hi").1, "noise");
1069 }
1070
1071 #[test]
1072 fn test_apply_with_label_text_matches_apply() {
1073 let inputs = ["hello", "world", "test", ""];
1074 for input in &inputs {
1075 assert_eq!(
1077 Transform::Reverse.apply_with_label(input).0,
1078 Transform::Reverse.apply(input)
1079 );
1080 assert_eq!(
1081 Transform::Uppercase.apply_with_label(input).0,
1082 Transform::Uppercase.apply(input)
1083 );
1084 assert_eq!(
1085 Transform::Mock.apply_with_label(input).0,
1086 Transform::Mock.apply(input)
1087 );
1088 }
1089 }
1090
1091 #[test]
1092 fn test_transform_chaos_produces_variety_over_many_calls() {
1093 let mut results: std::collections::HashSet<String> = std::collections::HashSet::new();
1095 for _ in 0..100 {
1096 results.insert(Transform::Chaos.apply("hello"));
1097 }
1098 assert!(results.len() >= 2, "Chaos should produce varied results");
1099 }
1100
1101 #[test]
1102 fn test_transform_scramble_same_chars() {
1103 let input = "hello";
1104 for _ in 0..20 {
1105 let result = Transform::Scramble.apply(input);
1106 let mut orig_sorted: Vec<char> = input.chars().collect();
1107 let mut res_sorted: Vec<char> = result.chars().collect();
1108 orig_sorted.sort();
1109 res_sorted.sort();
1110 assert_eq!(
1111 orig_sorted, res_sorted,
1112 "Scramble should produce same chars"
1113 );
1114 }
1115 }
1116
1117 #[test]
1118 fn test_transform_scramble_label() {
1119 let (_, label) = Transform::Scramble.apply_with_label("hi");
1120 assert_eq!(label, "scramble");
1121 }
1122
1123 #[test]
1124 fn test_transform_delete_empty() {
1125 assert_eq!(Transform::Delete.apply("hello"), "");
1126 assert_eq!(Transform::Delete.apply(""), "");
1127 }
1128
1129 #[test]
1130 fn test_transform_delete_label() {
1131 let (text, label) = Transform::Delete.apply_with_label("foo");
1132 assert_eq!(text, "");
1133 assert_eq!(label, "delete");
1134 }
1135
1136 #[test]
1137 fn test_transform_synonym_known() {
1138 assert_eq!(Transform::Synonym.apply("good"), "great");
1139 assert_eq!(Transform::Synonym.apply("bad"), "poor");
1140 assert_eq!(Transform::Synonym.apply("fast"), "quick");
1141 }
1142
1143 #[test]
1144 fn test_transform_synonym_unknown_passthrough() {
1145 assert_eq!(Transform::Synonym.apply("xyzzy"), "xyzzy");
1146 }
1147
1148 #[test]
1149 fn test_transform_synonym_label() {
1150 let (_, label) = Transform::Synonym.apply_with_label("good");
1151 assert_eq!(label, "synonym");
1152 }
1153
1154 #[test]
1155 fn test_transform_from_str_delay_colon() {
1156 assert!(matches!(
1157 Transform::from_str_loose("delay:200"),
1158 Ok(Transform::Delay(200))
1159 ));
1160 }
1161
1162 #[test]
1163 fn test_transform_from_str_delay_default() {
1164 assert!(matches!(
1165 Transform::from_str_loose("delay"),
1166 Ok(Transform::Delay(100))
1167 ));
1168 }
1169
1170 #[test]
1171 fn test_transform_delay_passthrough() {
1172 assert_eq!(Transform::Delay(50).apply("hello"), "hello");
1173 }
1174
1175 #[test]
1176 fn test_transform_from_str_scramble() {
1177 assert!(matches!(
1178 Transform::from_str_loose("scramble"),
1179 Ok(Transform::Scramble)
1180 ));
1181 }
1182
1183 #[test]
1184 fn test_transform_from_str_delete() {
1185 assert!(matches!(
1186 Transform::from_str_loose("delete"),
1187 Ok(Transform::Delete)
1188 ));
1189 }
1190
1191 #[test]
1192 fn test_transform_from_str_synonym() {
1193 assert!(matches!(
1194 Transform::from_str_loose("synonym"),
1195 Ok(Transform::Synonym)
1196 ));
1197 }
1198
1199 #[test]
1202 fn test_chain_reverse_uppercase() {
1203 let chain = Transform::Chain(vec![Transform::Reverse, Transform::Uppercase]);
1204 assert_eq!(chain.apply("hello"), "OLLEH");
1205 }
1206
1207 #[test]
1208 fn test_chain_mock_noise_label() {
1209 let chain = Transform::Chain(vec![Transform::Mock, Transform::Noise]);
1210 let (result, label) = chain.apply_with_label("hello");
1211 assert!(
1212 result.starts_with("hElLo"),
1213 "expected mock applied: {}",
1214 result
1215 );
1216 assert_eq!(label, "mock+noise");
1217 }
1218
1219 #[test]
1220 fn test_chain_from_str_loose_two() {
1221 let t = Transform::from_str_loose("reverse,uppercase").expect("parse ok");
1222 assert!(matches!(t, Transform::Chain(_)));
1223 assert_eq!(t.apply("hello"), "OLLEH");
1224 }
1225
1226 #[test]
1227 fn test_chain_from_str_loose_single_no_chain() {
1228 let t = Transform::from_str_loose("reverse").expect("parse ok");
1229 assert!(matches!(t, Transform::Reverse));
1230 }
1231
1232 #[test]
1233 fn test_chain_label_joined_with_plus() {
1234 let chain = Transform::Chain(vec![Transform::Reverse, Transform::Uppercase]);
1235 let (_, label) = chain.apply_with_label("hi");
1236 assert_eq!(label, "reverse+uppercase");
1237 }
1238
1239 #[test]
1242 fn test_tokenize_em_dash() {
1243 let tokens = tokenize("word\u{2014}another");
1244 assert!(tokens.contains(&"word".to_string()));
1245 assert!(tokens.contains(&"\u{2014}".to_string()));
1246 assert!(tokens.contains(&"another".to_string()));
1247 }
1248
1249 #[test]
1250 fn test_tokenize_smart_quotes() {
1251 let tokens = tokenize("\u{201C}hello\u{201D}");
1252 assert!(tokens.contains(&"\u{201C}".to_string()));
1253 assert!(tokens.contains(&"hello".to_string()));
1254 assert!(tokens.contains(&"\u{201D}".to_string()));
1255 }
1256
1257 #[test]
1258 fn test_tokenize_ellipsis_unicode() {
1259 let tokens = tokenize("wait\u{2026}done");
1260 assert!(tokens.contains(&"\u{2026}".to_string()));
1261 assert!(tokens.contains(&"wait".to_string()));
1262 assert!(tokens.contains(&"done".to_string()));
1263 }
1264
1265 #[test]
1266 fn test_tokenize_en_dash() {
1267 let tokens = tokenize("2020\u{2013}2021");
1268 assert!(tokens.contains(&"\u{2013}".to_string()));
1269 }
1270
1271 #[test]
1272 fn test_tokenize_unicode_punct_preserves_all_chars() {
1273 let input = "hello\u{2014}world";
1274 assert_eq!(tokenize(input).join(""), input);
1275 }
1276
1277 #[test]
1280 fn test_importance_rng_same_seed_same_output() {
1281 use rand::SeedableRng;
1282 let mut rng1 = rand::rngs::StdRng::seed_from_u64(42);
1283 let mut rng2 = rand::rngs::StdRng::seed_from_u64(42);
1284 let v1 = calculate_token_importance_rng("hello", 10, &mut rng1);
1285 let v2 = calculate_token_importance_rng("hello", 10, &mut rng2);
1286 assert_eq!(v1, v2, "same seed must produce same result");
1287 }
1288
1289 #[test]
1290 fn test_importance_rng_different_seeds_differ() {
1291 use rand::SeedableRng;
1292 let mut results: std::collections::HashSet<u64> = std::collections::HashSet::new();
1293 for seed in 0u64..50 {
1294 let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
1295 let v = calculate_token_importance_rng("test", 10, &mut rng);
1296 results.insert(v.to_bits());
1297 }
1298 assert!(results.len() > 1, "different seeds should sometimes differ");
1299 }
1300
1301 #[test]
1302 fn test_importance_rng_in_range() {
1303 use rand::SeedableRng;
1304 let mut rng = rand::rngs::StdRng::seed_from_u64(123);
1305 let v = calculate_token_importance_rng("algorithm", 5, &mut rng);
1306 assert!(v >= 0.0 && v <= 1.0);
1307 }
1308
1309 #[test]
1312 fn test_scramble_empty_string() {
1313 assert_eq!(Transform::Scramble.apply(""), "");
1314 }
1315
1316 #[test]
1317 fn test_scramble_single_char() {
1318 for _ in 0..10 {
1319 assert_eq!(Transform::Scramble.apply("a"), "a");
1320 }
1321 }
1322
1323 #[test]
1324 fn test_scramble_preserves_chars() {
1325 let input = "hello";
1326 for _ in 0..20 {
1327 let result = Transform::Scramble.apply(input);
1328 let mut orig: Vec<char> = input.chars().collect();
1329 let mut res: Vec<char> = result.chars().collect();
1330 orig.sort();
1331 res.sort();
1332 assert_eq!(orig, res, "scramble should preserve the same characters");
1333 }
1334 }
1335
1336 #[test]
1337 fn test_scramble_produces_variety() {
1338 let mut results = std::collections::HashSet::new();
1339 for _ in 0..50 {
1340 results.insert(Transform::Scramble.apply("hello"));
1341 }
1342 assert!(
1343 results.len() >= 2,
1344 "scramble should produce different orderings"
1345 );
1346 }
1347
1348 #[test]
1349 fn test_delete_always_returns_empty() {
1350 for input in &["hello", "world", "test", "a", "abc123", ""] {
1351 assert_eq!(Transform::Delete.apply(input), "");
1352 }
1353 }
1354
1355 #[test]
1356 fn test_scramble_two_chars_both_permutations() {
1357 let mut seen = std::collections::HashSet::new();
1358 for _ in 0..200 {
1359 seen.insert(Transform::Scramble.apply("ab"));
1360 }
1361 assert!(seen.len() >= 1, "scramble of two chars should work");
1362 }
1363
1364 mod param_tests {
1367 use super::super::Transform;
1368 use rstest::rstest;
1369
1370 #[rstest]
1371 #[case("reverse", "olleh")]
1372 #[case("uppercase", "HELLO")]
1373 #[case("mock", "hElLo")]
1374 #[case("delete", "")]
1375 fn test_deterministic_transforms(#[case] name: &str, #[case] expected: &str) {
1376 let t = Transform::from_str_loose(name).expect("valid transform");
1377 assert_eq!(t.apply("hello"), expected, "transform={name}");
1378 }
1379
1380 #[rstest]
1381 #[case("reverse")]
1382 #[case("uppercase")]
1383 #[case("mock")]
1384 #[case("noise")]
1385 #[case("chaos")]
1386 #[case("scramble")]
1387 #[case("delete")]
1388 #[case("synonym")]
1389 #[case("delay")]
1390 fn test_all_transforms_parse(#[case] name: &str) {
1391 assert!(
1392 Transform::from_str_loose(name).is_ok(),
1393 "expected '{name}' to parse"
1394 );
1395 }
1396
1397 #[rstest]
1398 #[case("REVERSE")]
1399 #[case("Uppercase")]
1400 #[case("MOCK")]
1401 #[case("NOISE")]
1402 fn test_case_insensitive_parse(#[case] name: &str) {
1403 assert!(
1404 Transform::from_str_loose(name).is_ok(),
1405 "expected '{name}' to parse case-insensitively"
1406 );
1407 }
1408
1409 #[rstest]
1410 #[case("")]
1411 #[case("invalid")]
1412 #[case("REVERSED")]
1413 #[case("upper case")]
1414 fn test_invalid_transforms_error(#[case] name: &str) {
1415 assert!(
1416 Transform::from_str_loose(name).is_err(),
1417 "expected '{name}' to fail"
1418 );
1419 }
1420 }
1421
1422 #[test]
1425 fn test_chain_prefix_two_transforms() {
1426 let t = Transform::from_str_loose("chain:reverse,uppercase").unwrap();
1427 assert!(matches!(t, Transform::Chain(_)));
1428 }
1429
1430 #[test]
1431 fn test_chain_prefix_single_unwraps() {
1432 let t = Transform::from_str_loose("chain:reverse").unwrap();
1433 assert!(matches!(t, Transform::Reverse));
1434 }
1435
1436 #[test]
1437 fn test_chain_prefix_invalid_propagates_err() {
1438 assert!(Transform::from_str_loose("chain:notreal,reverse").is_err());
1439 }
1440
1441 #[test]
1442 fn test_chain_prefix_equivalent_to_comma() {
1443 let with_prefix = Transform::from_str_loose("chain:reverse,uppercase").unwrap();
1444 let without_prefix = Transform::from_str_loose("reverse,uppercase").unwrap();
1445 match (with_prefix, without_prefix) {
1447 (Transform::Chain(a), Transform::Chain(b)) => assert_eq!(a.len(), b.len()),
1448 _ => panic!("both should be Chain variants"),
1449 }
1450 }
1451
1452 #[test]
1455 fn test_tokenize_cjk_individual_chars() {
1456 let tokens = tokenize("你好");
1457 assert_eq!(tokens, vec!["你", "好"], "each CJK char should be its own token");
1458 }
1459
1460 #[test]
1461 fn test_tokenize_cjk_mixed_with_latin() {
1462 let tokens = tokenize("hello你好world");
1463 assert!(tokens.contains(&"你".to_string()));
1464 assert!(tokens.contains(&"好".to_string()));
1465 assert!(tokens.contains(&"hello".to_string()));
1466 assert!(tokens.contains(&"world".to_string()));
1467 }
1468
1469 #[test]
1470 fn test_tokenize_cjk_with_spaces() {
1471 let tokens = tokenize("你 好");
1472 assert!(tokens.contains(&"你".to_string()));
1474 assert!(tokens.contains(&"好".to_string()));
1475 }
1476
1477 #[test]
1482 fn test_synonym_overrides_all() {
1483 assert_eq!(Transform::Synonym.apply("bad"), "poor");
1485
1486 set_synonym_overrides(
1488 vec![("good".to_string(), "fantastic".to_string())]
1489 .into_iter()
1490 .collect(),
1491 );
1492 assert_eq!(Transform::Synonym.apply("good"), "fantastic");
1493 set_synonym_overrides(std::collections::HashMap::new());
1494
1495 let tmp_tsv = std::env::temp_dir().join("synonyms_test_seq.tsv");
1497 std::fs::write(&tmp_tsv, "zephyr\tbreeze\n# comment line\n").expect("write");
1498 load_synonym_overrides(tmp_tsv.to_str().unwrap()).expect("load tsv");
1499 assert_eq!(Transform::Synonym.apply("zephyr"), "breeze");
1500 std::fs::remove_file(&tmp_tsv).ok();
1501 set_synonym_overrides(std::collections::HashMap::new());
1502
1503 let tmp_kv = std::env::temp_dir().join("synonyms_kv_seq.txt");
1505 std::fs::write(&tmp_kv, "crimson = scarlet\n").expect("write");
1506 load_synonym_overrides(tmp_kv.to_str().unwrap()).expect("load kv");
1507 assert_eq!(Transform::Synonym.apply("crimson"), "scarlet");
1508 std::fs::remove_file(&tmp_kv).ok();
1509 set_synonym_overrides(std::collections::HashMap::new());
1510
1511 assert_eq!(Transform::Synonym.apply("fast"), "quick");
1513 }
1514
1515 }
1517
1518#[cfg(test)]
1519mod proptests {
1520 use super::*;
1521 use proptest::prelude::*;
1522
1523 proptest! {
1524 #[test]
1525 fn reverse_is_involution(s in "\\PC{0,50}") {
1526 let t = Transform::Reverse;
1527 let (once, _) = t.apply_with_label(&s);
1528 let t2 = Transform::Reverse;
1529 let (twice, _) = t2.apply_with_label(&once);
1530 prop_assert_eq!(twice, s);
1531 }
1532
1533 #[test]
1534 fn uppercase_is_idempotent(s in "\\PC{0,50}") {
1535 let t = Transform::Uppercase;
1536 let (once, _) = t.apply_with_label(&s);
1537 let t2 = Transform::Uppercase;
1538 let (twice, _) = t2.apply_with_label(&once);
1539 prop_assert_eq!(twice, once);
1540 }
1541
1542 #[test]
1543 fn noise_appends_one_char(s in "[a-z]{1,20}") {
1544 let t = Transform::Noise;
1545 let (out, _) = t.apply_with_label(&s);
1546 let in_chars = s.chars().count();
1548 let out_chars = out.chars().count();
1549 prop_assert_eq!(out_chars, in_chars + 1);
1550 }
1551
1552 #[test]
1553 fn delete_always_empty(s in "\\PC{0,50}") {
1554 let t = Transform::Delete;
1555 let (out, _) = t.apply_with_label(&s);
1556 prop_assert_eq!(out.as_str(), "");
1557 }
1558
1559 #[test]
1560 fn chain_applies_in_order(s in "[a-z]{5,20}") {
1561 let chain = Transform::Chain(vec![Transform::Uppercase, Transform::Reverse]);
1563 let (out, _) = chain.apply_with_label(&s);
1564 let upper = s.to_uppercase();
1566 let expected: String = upper.chars().rev().collect();
1567 prop_assert_eq!(out, expected);
1568 }
1569 }
1570}
1571
1572#[cfg(test)]
1578mod confidence_tests {
1579 use super::*;
1580
1581 #[test]
1584 fn test_confidence_thresholds_custom() {
1585 use crate::render::{ConfidenceBand, ConfidenceThresholds};
1586 let t = ConfidenceThresholds { high: 0.9, mid: 0.6 };
1587 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.95, &t), ConfidenceBand::High);
1588 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.75, &t), ConfidenceBand::Mid);
1589 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.3, &t), ConfidenceBand::Low);
1590 }
1591
1592 #[test]
1593 fn test_confidence_thresholds_default_unchanged() {
1594 use crate::render::{ConfidenceBand, ConfidenceThresholds};
1595 let t = ConfidenceThresholds::default();
1596 assert_eq!(t.high, 0.7);
1597 assert_eq!(t.mid, 0.4);
1598 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.7, &t), ConfidenceBand::High);
1599 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.4, &t), ConfidenceBand::Mid);
1600 assert_eq!(ConfidenceBand::from_confidence_with_thresholds(0.39, &t), ConfidenceBand::Low);
1601 }
1602
1603 #[test]
1611 fn test_reverse_combining_marks() {
1612 let input = "e\u{0301}"; let transform = Transform::Reverse;
1614 let (result, _) = transform.apply_with_label(input);
1615 assert!(std::str::from_utf8(result.as_bytes()).is_ok(),
1618 "result must be valid UTF-8");
1619 assert!(!result.is_empty(), "result must not be empty");
1620 }
1621
1622 #[test]
1623 fn test_scramble_preserves_length() {
1624 let input = "hello world";
1625 let transform = Transform::Scramble;
1626 let mut rng = rand::thread_rng();
1627 let (result, _) = transform.apply_with_label_rng(input, &mut rng);
1628 assert_eq!(result.chars().count(), input.chars().count(),
1629 "scramble should preserve char count");
1630 }
1631
1632 #[test]
1633 fn test_mock_preserves_grapheme_count() {
1634 let input = "你好世界";
1635 let transform = Transform::Mock;
1636 let (result, _) = transform.apply_with_label(input);
1637 assert_eq!(result.chars().count(), input.chars().count(),
1638 "mock should preserve CJK char count");
1639 }
1640
1641 #[test]
1643 fn test_dry_run_chain_shows_steps() {
1644 let chain = Transform::Chain(vec![Transform::Reverse, Transform::Uppercase]);
1645 let input = "hello world";
1646 let (after_reverse, _) = Transform::Reverse.apply_with_label(input);
1648 assert_eq!(after_reverse, "dlrow olleh");
1649 let (after_uppercase, _) = Transform::Uppercase.apply_with_label(&after_reverse);
1650 assert_eq!(after_uppercase, "DLROW OLLEH");
1651 let (chain_result, _) = chain.apply_with_label(input);
1653 assert_eq!(chain_result, "DLROW OLLEH");
1654 }
1655
1656 #[test]
1658 fn test_synonym_load_error_includes_line_number() {
1659 use std::io::Write;
1660 let tmp = std::env::temp_dir().join("eot_synonym_test_bad.tsv");
1661 let mut f = std::fs::File::create(&tmp).unwrap();
1662 writeln!(f, "good\tgreat").unwrap();
1663 writeln!(f, "bad_line_no_separator").unwrap(); writeln!(f, "fast\tquick").unwrap();
1665 drop(f);
1666 let result = load_synonym_overrides(tmp.to_str().unwrap());
1669 assert!(result.is_ok(), "load_synonym_overrides should not fail on bad lines");
1670 let _ = std::fs::remove_file(&tmp);
1671 }
1672}