1use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Clone)]
12pub struct RosettaEntry {
13 pub symbol: &'static str,
14 pub patterns: &'static [&'static str],
15 pub category: &'static str,
16}
17
18pub static ROSETTA: &[RosettaEntry] = &[
21 RosettaEntry {
25 symbol: "∀",
26 patterns: &["for all", "for every", "every", "all", "each", "any"],
27 category: "quantifier",
28 },
29 RosettaEntry {
30 symbol: "∃",
31 patterns: &["there exists", "exists", "some", "at least one", "there is"],
32 category: "quantifier",
33 },
34 RosettaEntry {
35 symbol: "∃!",
36 patterns: &[
37 "exists unique",
38 "exactly one",
39 "unique",
40 "one and only one",
41 "exists exactly one",
42 ],
43 category: "quantifier",
44 },
45 RosettaEntry {
46 symbol: "∄",
47 patterns: &["does not exist", "no such", "none exists"],
48 category: "quantifier",
49 },
50 RosettaEntry {
54 symbol: "∧",
55 patterns: &["and", "both", "as well as", "together with", "also"],
56 category: "logic",
57 },
58 RosettaEntry {
59 symbol: "∨",
60 patterns: &["or", "either", "alternatively", "otherwise"],
61 category: "logic",
62 },
63 RosettaEntry {
64 symbol: "¬",
65 patterns: &["not", "negation", "isn't", "is not", "doesn't", "does not"],
66 category: "logic",
67 },
68 RosettaEntry {
69 symbol: "⇒",
70 patterns: &[
71 "implies",
72 "if then",
73 "therefore",
74 "then",
75 "consequently",
76 "so",
77 "hence",
78 ],
79 category: "logic",
80 },
81 RosettaEntry {
82 symbol: "⇔",
83 patterns: &[
84 "if and only if",
85 "iff",
86 "equivalent to",
87 "is equivalent",
88 "exactly when",
89 ],
90 category: "logic",
91 },
92 RosettaEntry {
93 symbol: "→",
94 patterns: &["to", "returns", "maps to", "yields", "produces", "goes to"],
95 category: "logic",
96 },
97 RosettaEntry {
98 symbol: "↔",
99 patterns: &["bidirectional", "two-way", "both ways"],
100 category: "logic",
101 },
102 RosettaEntry {
103 symbol: "⊕",
104 patterns: &["xor", "exclusive or", "either but not both"],
105 category: "logic",
106 },
107 RosettaEntry {
111 symbol: ">",
112 patterns: &[
113 "greater than",
114 "more than",
115 "exceeds",
116 "above",
117 "larger than",
118 ],
119 category: "comparison",
120 },
121 RosettaEntry {
122 symbol: "<",
123 patterns: &["less than", "fewer than", "below", "smaller than", "under"],
124 category: "comparison",
125 },
126 RosettaEntry {
127 symbol: "≥",
128 patterns: &[
129 "greater than or equal",
130 "at least",
131 "no less than",
132 "minimum",
133 ">=",
134 ],
135 category: "comparison",
136 },
137 RosettaEntry {
138 symbol: "≤",
139 patterns: &[
140 "less than or equal",
141 "at most",
142 "no more than",
143 "maximum",
144 "<=",
145 ],
146 category: "comparison",
147 },
148 RosettaEntry {
149 symbol: "≡",
150 patterns: &[
151 "identical to",
152 "equals",
153 "is equal to",
154 "same as",
155 "equivalent",
156 "===",
157 "==",
158 ],
159 category: "comparison",
160 },
161 RosettaEntry {
162 symbol: "≢",
163 patterns: &[
164 "not identical",
165 "not equal",
166 "differs from",
167 "different from",
168 "!==",
169 "!=",
170 ],
171 category: "comparison",
172 },
173 RosettaEntry {
174 symbol: "≈",
175 patterns: &["approximately", "roughly", "about", "nearly"],
176 category: "comparison",
177 },
178 RosettaEntry {
182 symbol: "≜",
183 patterns: &[
184 "defined as",
185 "is defined as",
186 "equals by definition",
187 "is a",
188 "means",
189 "definition",
190 ],
191 category: "definition",
192 },
193 RosettaEntry {
194 symbol: "≔",
195 patterns: &["assigned", "set to", "becomes", "gets", "is assigned", ":="],
196 category: "definition",
197 },
198 RosettaEntry {
199 symbol: "↦",
200 patterns: &["mapsto", "maps to", "sends to"],
201 category: "definition",
202 },
203 RosettaEntry {
207 symbol: "λ",
208 patterns: &[
209 "lambda",
210 "function",
211 "anonymous function",
212 "fn",
213 "func",
214 "=>",
215 ],
216 category: "function",
217 },
218 RosettaEntry {
219 symbol: "∘",
220 patterns: &["compose", "composed with", "followed by"],
221 category: "function",
222 },
223 RosettaEntry {
224 symbol: "fix",
225 patterns: &["fixpoint", "recursive", "fixed point"],
226 category: "function",
227 },
228 RosettaEntry {
229 symbol: "μ",
230 patterns: &["least fixpoint", "lfp", "mu"],
231 category: "function",
232 },
233 RosettaEntry {
237 symbol: "∈",
238 patterns: &["in", "element of", "member of", "belongs to", "is in"],
239 category: "set",
240 },
241 RosettaEntry {
242 symbol: "∉",
243 patterns: &["not in", "not element of", "not member of", "outside"],
244 category: "set",
245 },
246 RosettaEntry {
247 symbol: "⊆",
248 patterns: &["subset", "subset of", "contained in", "part of"],
249 category: "set",
250 },
251 RosettaEntry {
252 symbol: "⊇",
253 patterns: &["superset", "superset of", "contains"],
254 category: "set",
255 },
256 RosettaEntry {
257 symbol: "⊂",
258 patterns: &["proper subset", "strict subset"],
259 category: "set",
260 },
261 RosettaEntry {
262 symbol: "⊃",
263 patterns: &["proper superset", "strict superset"],
264 category: "set",
265 },
266 RosettaEntry {
267 symbol: "∪",
268 patterns: &["union", "combined with", "merged with"],
269 category: "set",
270 },
271 RosettaEntry {
272 symbol: "∩",
273 patterns: &["intersection", "overlapping with", "common to", "shared by"],
274 category: "set",
275 },
276 RosettaEntry {
277 symbol: "∅",
278 patterns: &["empty", "empty set", "null", "nothing", "nil", "void"],
279 category: "set",
280 },
281 RosettaEntry {
282 symbol: "𝒫",
283 patterns: &["powerset", "power set", "all subsets"],
284 category: "set",
285 },
286 RosettaEntry {
287 symbol: "∖",
288 patterns: &["set difference", "minus", "except", "without"],
289 category: "set",
290 },
291 RosettaEntry {
292 symbol: "𝔾",
293 patterns: &["graph", "network", "structure"],
294 category: "set",
295 },
296 RosettaEntry {
300 symbol: "Δ",
301 patterns: &["delta", "difference", "change", "increment"],
302 category: "contractor",
303 },
304 RosettaEntry {
305 symbol: "Pre",
306 patterns: &["precondition", "requires", "before"],
307 category: "contractor",
308 },
309 RosettaEntry {
310 symbol: "Post",
311 patterns: &["postcondition", "ensures", "after", "guarantees"],
312 category: "contractor",
313 },
314 RosettaEntry {
315 symbol: "Inv",
316 patterns: &["invariant", "always true", "maintained"],
317 category: "contractor",
318 },
319 RosettaEntry {
323 symbol: "Ψ",
324 patterns: &["intent", "goal", "purpose", "objective"],
325 category: "intent",
326 },
327 RosettaEntry {
328 symbol: "μ",
329 patterns: &["fitness", "utility", "score", "metric"],
330 category: "intent",
331 },
332 RosettaEntry {
333 symbol: "Target",
334 patterns: &["target", "aim", "destination"],
335 category: "intent",
336 },
337 RosettaEntry {
341 symbol: "ℕ",
342 patterns: &[
343 "natural",
344 "natural number",
345 "positive integer",
346 "nat",
347 "natural numbers",
348 "unsigned",
349 ],
350 category: "type",
351 },
352 RosettaEntry {
353 symbol: "ℤ",
354 patterns: &[
355 "integer",
356 "int",
357 "whole number",
358 "integers",
359 "signed integer",
360 ],
361 category: "type",
362 },
363 RosettaEntry {
364 symbol: "ℝ",
365 patterns: &[
366 "real",
367 "real number",
368 "float",
369 "decimal",
370 "double",
371 "number",
372 ],
373 category: "type",
374 },
375 RosettaEntry {
376 symbol: "ℚ",
377 patterns: &["rational", "rational number", "fraction"],
378 category: "type",
379 },
380 RosettaEntry {
381 symbol: "𝔹",
382 patterns: &["boolean", "bool", "true or false", "binary", "flag"],
383 category: "type",
384 },
385 RosettaEntry {
386 symbol: "𝕊",
387 patterns: &["string", "str", "text", "char sequence", "varchar"],
388 category: "type",
389 },
390 RosettaEntry {
391 symbol: "ℂ",
392 patterns: &["complex", "complex number"],
393 category: "type",
394 },
395 RosettaEntry {
396 symbol: "List",
397 patterns: &["list", "array", "sequence", "vector"],
398 category: "type",
399 },
400 RosettaEntry {
401 symbol: "Maybe",
402 patterns: &["maybe", "optional", "nullable", "option"],
403 category: "type",
404 },
405 RosettaEntry {
406 symbol: "Either",
407 patterns: &["either", "result", "union type"],
408 category: "type",
409 },
410 RosettaEntry {
414 symbol: "⊤",
415 patterns: &["true", "top", "yes", "valid", "correct", "success", "ok"],
416 category: "truth",
417 },
418 RosettaEntry {
419 symbol: "⊥",
420 patterns: &[
421 "false",
422 "bottom",
423 "no",
424 "invalid",
425 "incorrect",
426 "failure",
427 "crash",
428 "error",
429 ],
430 category: "truth",
431 },
432 RosettaEntry {
436 symbol: "∎",
437 patterns: &["qed", "proven", "end of proof", "proved", "done"],
438 category: "special",
439 },
440 RosettaEntry {
441 symbol: "⊢",
442 patterns: &["proves", "entails", "derives", "turnstile", "yields"],
443 category: "special",
444 },
445 RosettaEntry {
446 symbol: "⊨",
447 patterns: &["models", "satisfies", "validates"],
448 category: "special",
449 },
450 RosettaEntry {
451 symbol: "□",
452 patterns: &["necessarily", "always", "box"],
453 category: "special",
454 },
455 RosettaEntry {
456 symbol: "◇",
457 patterns: &["possibly", "eventually", "diamond"],
458 category: "special",
459 },
460 RosettaEntry {
464 symbol: "+",
465 patterns: &["plus", "added to", "sum of", "add"],
466 category: "math",
467 },
468 RosettaEntry {
469 symbol: "−",
470 patterns: &["minus", "subtract", "subtracted from"],
471 category: "math",
472 },
473 RosettaEntry {
474 symbol: "×",
475 patterns: &["times", "multiplied by", "product of", "multiply"],
476 category: "math",
477 },
478 RosettaEntry {
479 symbol: "÷",
480 patterns: &["divided by", "over", "ratio of", "divide"],
481 category: "math",
482 },
483 RosettaEntry {
484 symbol: "²",
485 patterns: &["squared", "square of", "to the power of 2"],
486 category: "math",
487 },
488 RosettaEntry {
489 symbol: "³",
490 patterns: &["cubed", "cube of", "to the power of 3"],
491 category: "math",
492 },
493 RosettaEntry {
494 symbol: "√",
495 patterns: &["square root", "sqrt", "root of"],
496 category: "math",
497 },
498 RosettaEntry {
499 symbol: "Σ",
500 patterns: &["sum", "summation", "sigma"],
501 category: "math",
502 },
503 RosettaEntry {
504 symbol: "Π",
505 patterns: &["product", "pi", "prod"],
506 category: "math",
507 },
508 RosettaEntry {
509 symbol: "∞",
510 patterns: &["infinity", "infinite", "unbounded"],
511 category: "math",
512 },
513 RosettaEntry {
517 symbol: "⟦Ω⟧",
518 patterns: &["meta block", "metadata", "foundation"],
519 category: "block",
520 },
521 RosettaEntry {
522 symbol: "⟦Σ⟧",
523 patterns: &["types block", "type definitions", "glossary"],
524 category: "block",
525 },
526 RosettaEntry {
527 symbol: "⟦Γ⟧",
528 patterns: &["rules block", "business rules", "constraints"],
529 category: "block",
530 },
531 RosettaEntry {
532 symbol: "⟦Λ⟧",
533 patterns: &["functions block", "function definitions", "lambdas"],
534 category: "block",
535 },
536 RosettaEntry {
537 symbol: "⟦Χ⟧",
538 patterns: &["errors block", "error handling", "exceptions"],
539 category: "block",
540 },
541 RosettaEntry {
542 symbol: "⟦Ε⟧",
543 patterns: &["evidence block", "proof", "validation"],
544 category: "block",
545 },
546 RosettaEntry {
550 symbol: "⟨",
551 patterns: &["tuple start", "record start", "angle open"],
552 category: "special",
553 },
554 RosettaEntry {
555 symbol: "⟩",
556 patterns: &["tuple end", "record end", "angle close"],
557 category: "special",
558 },
559 RosettaEntry {
563 symbol: "◊⁺⁺",
564 patterns: &["platinum", "platinum tier", "optimal"],
565 category: "tier",
566 },
567 RosettaEntry {
568 symbol: "◊⁺",
569 patterns: &["gold", "gold tier", "production ready"],
570 category: "tier",
571 },
572 RosettaEntry {
573 symbol: "◊",
574 patterns: &["silver", "silver tier", "good"],
575 category: "tier",
576 },
577 RosettaEntry {
578 symbol: "◊⁻",
579 patterns: &["bronze", "bronze tier", "acceptable"],
580 category: "tier",
581 },
582 RosettaEntry {
583 symbol: "⊘",
584 patterns: &["reject", "rejected", "invalid tier"],
585 category: "tier",
586 },
587];
588
589lazy_static! {
590 pub static ref ROSETTA_SORTED: Vec<&'static RosettaEntry> = {
592 let mut entries: Vec<_> = ROSETTA.iter().collect();
593 entries.sort_by(|a, b| {
594 let max_a = a.patterns.iter().map(|p| p.len()).max().unwrap_or(0);
595 let max_b = b.patterns.iter().map(|p| p.len()).max().unwrap_or(0);
596 max_b.cmp(&max_a)
597 });
598 entries
599 };
600
601 pub static ref PATTERN_TO_SYMBOL: HashMap<String, &'static str> = {
603 let mut m = HashMap::new();
604 for entry in ROSETTA {
605 for pattern in entry.patterns {
606 m.insert(pattern.to_lowercase(), entry.symbol);
607 }
608 }
609 m
610 };
611
612 pub static ref SYMBOL_TO_PATTERN: HashMap<&'static str, &'static str> = {
614 let mut m = HashMap::new();
615 for entry in ROSETTA {
616 if let Some(first) = entry.patterns.first() {
617 m.insert(entry.symbol, *first);
618 }
619 }
620 m
621 };
622
623 pub static ref ROSETTA_COMPILED: Vec<CompiledRosettaEntry> = {
625 ROSETTA_SORTED.iter().map(|entry| {
626 let compiled_patterns = entry.patterns.iter().filter_map(|pattern| {
627 let regex_str = format!(r"(?i)\b{}\b", escape_regex(pattern));
628 Regex::new(®ex_str).ok()
629 }).collect();
630
631 CompiledRosettaEntry {
632 symbol: entry.symbol,
633 regexes: compiled_patterns,
634 }
635 }).collect()
636 };
637}
638
639pub struct CompiledRosettaEntry {
641 pub symbol: &'static str,
642 pub regexes: Vec<Regex>,
643}
644
645pub fn prose_to_symbol(pattern: &str) -> Option<&'static str> {
647 PATTERN_TO_SYMBOL
648 .get(&pattern.to_lowercase().trim().to_string())
649 .copied()
650}
651
652pub fn symbol_to_prose(symbol: &str) -> Option<&'static str> {
654 SYMBOL_TO_PATTERN.get(symbol).copied()
655}
656
657pub fn symbols_by_category(category: &str) -> Vec<&'static str> {
659 ROSETTA
660 .iter()
661 .filter(|e| e.category == category)
662 .map(|e| e.symbol)
663 .collect()
664}
665
666pub fn get_all_categories() -> Vec<&'static str> {
668 let mut categories: Vec<_> = ROSETTA.iter().map(|e| e.category).collect();
669 categories.sort();
670 categories.dedup();
671 categories
672}
673
674pub fn get_mapping_count() -> usize {
676 ROSETTA.iter().map(|e| e.patterns.len()).sum()
677}
678
679fn escape_regex(s: &str) -> String {
681 let special = [
682 '\\', '.', '*', '+', '?', '^', '$', '{', '}', '(', ')', '|', '[', ']',
683 ];
684 let mut result = String::with_capacity(s.len() * 2);
685 for c in s.chars() {
686 if special.contains(&c) {
687 result.push('\\');
688 }
689 result.push(c);
690 }
691 result
692}
693
694pub struct RosettaStone;
696
697impl RosettaStone {
698 pub fn convert(input: &str) -> (String, usize, Vec<String>) {
701 let mut result = input.to_string();
702 let mut mapped_chars = 0;
703 let _total_chars = input.len();
704
705 for entry in ROSETTA_COMPILED.iter() {
707 for regex in entry.regexes.iter() {
708 let matches: Vec<_> = regex.find_iter(&result).collect();
709 mapped_chars += matches.iter().map(|m| m.as_str().len()).sum::<usize>();
710 result = regex.replace_all(&result, entry.symbol).to_string();
711 }
712 }
713
714 result = Self::cleanup_operators(&result);
716
717 result = Self::convert_assignments(&result);
719
720 let unmapped = Self::find_unmapped_words(&result);
722
723 (result.trim().to_string(), mapped_chars, unmapped)
724 }
725
726 pub fn confidence(input_len: usize, mapped_chars: usize) -> f64 {
728 if input_len == 0 {
729 return 1.0;
730 }
731 (mapped_chars as f64 / input_len as f64).min(1.0)
732 }
733
734 fn cleanup_operators(input: &str) -> String {
736 let operators = ["≜", "≔", "⇒", "∈", "→", "⇔", "∧", "∨"];
737 let mut result = input.to_string();
738
739 for op in operators {
740 let regex_str = format!(r"\s*{}\s*", escape_regex(op));
741 if let Ok(regex) = Regex::new(®ex_str) {
742 result = regex.replace_all(&result, op).to_string();
743 }
744 }
745
746 result
747 }
748
749 fn convert_assignments(input: &str) -> String {
751 let mut result = input.to_string();
752
753 if let Ok(regex) = Regex::new(r"(?i)const\s+(\w+)\s*=\s*(\S+)") {
755 result = regex.replace_all(&result, "$1≜$2").to_string();
756 }
757
758 if let Ok(regex) = Regex::new(r"(?i)Define\s+(\w+)\s+as\s+(\S+)") {
760 result = regex.replace_all(&result, "$1≜$2").to_string();
761 }
762
763 if let Ok(regex) = Regex::new(r"(?i)let\s+(\w+)\s*=\s*(\S+)") {
765 result = regex.replace_all(&result, "$1≜$2").to_string();
766 }
767
768 result
769 }
770
771 fn find_unmapped_words(result: &str) -> Vec<String> {
773 let ignore_words = [
774 "the", "with", "that", "this", "from", "into", "when", "where", "which", "what",
775 ];
776
777 let word_regex = Regex::new(r"\b[a-zA-Z]{3,}\b").unwrap();
778 let words: Vec<_> = word_regex
779 .find_iter(result)
780 .map(|m| m.as_str().to_lowercase())
781 .collect();
782
783 let mut unique: Vec<_> = words
784 .into_iter()
785 .filter(|w| !ignore_words.contains(&w.as_str()))
786 .collect();
787
788 unique.sort();
789 unique.dedup();
790 unique
791 }
792
793 pub fn to_prose(input: &str) -> String {
796 let mut result = input.to_string();
797
798 let mut entries: Vec<_> = ROSETTA.iter().collect();
800 entries.sort_by(|a, b| b.symbol.len().cmp(&a.symbol.len()));
801
802 for entry in entries {
803 if let Some(primary) = entry.patterns.first() {
804 let replacement = format!(" {} ", primary);
806 result = result.replace(entry.symbol, &replacement);
807 }
808 }
809
810 result = Self::add_word_boundaries(&result);
813
814 Self::normalize_whitespace(&result)
816 }
817
818 fn add_word_boundaries(input: &str) -> String {
820 let camel_case = Regex::new(r"([a-z])([A-Z])").unwrap();
822 let result = camel_case.replace_all(input, "$1 $2");
823
824 let word_join = Regex::new(r"([a-zA-Z])( )(for all|exists|implies|and|or|not|if|then|else|in|defined as|identical to|true|false|lambda|function|returns|boolean|integer|string|natural|real|proves|therefore|yields)( )").unwrap();
826 let result = word_join.replace_all(&result, "$1 $3 ");
827
828 result.to_string()
829 }
830
831 fn normalize_whitespace(input: &str) -> String {
833 let multiple_spaces = Regex::new(r"\s+").unwrap();
834 let result = multiple_spaces.replace_all(input, " ");
835
836 let space_before_punct = Regex::new(r"\s+([.,;:!?])").unwrap();
838 let result = space_before_punct.replace_all(&result, "$1");
839
840 let space_after_open = Regex::new(r"([(\[{])\s+").unwrap();
842 let result = space_after_open.replace_all(&result, "$1");
843
844 let space_before_close = Regex::new(r"\s+([)\]}])").unwrap();
846 let result = space_before_close.replace_all(&result, "$1");
847
848 result.trim().to_string()
849 }
850
851 pub fn normalize_for_comparison(input: &str) -> String {
853 let lowercase = input.to_lowercase();
854 let normalized = Self::normalize_whitespace(&lowercase);
855
856 let punct_regex = Regex::new(r#"[.,;:!?"']"#).unwrap();
858 punct_regex.replace_all(&normalized, "").trim().to_string()
859 }
860
861 pub fn semantic_similarity(text1: &str, text2: &str) -> f64 {
864 let norm1 = Self::normalize_for_comparison(text1);
865 let norm2 = Self::normalize_for_comparison(text2);
866
867 let words1: HashSet<_> = norm1.split_whitespace().collect();
869 let words2: HashSet<_> = norm2.split_whitespace().collect();
870
871 if words1.is_empty() && words2.is_empty() {
872 return 1.0;
873 }
874
875 let intersection = words1.intersection(&words2).count();
877 let union = words1.union(&words2).count();
878
879 if union == 0 {
880 1.0
881 } else {
882 intersection as f64 / union as f64
883 }
884 }
885}
886
887#[cfg(test)]
888mod tests {
889 use super::*;
890
891 #[test]
892 fn test_prose_to_symbol() {
893 assert_eq!(prose_to_symbol("for all"), Some("∀"));
894 assert_eq!(prose_to_symbol("exists"), Some("∃"));
895 assert_eq!(prose_to_symbol("unknown"), None);
896 }
897
898 #[test]
899 fn test_convert_basic() {
900 let (result, _, _) = RosettaStone::convert("for all x in S");
901 assert!(result.contains("∀"));
902 assert!(result.contains("∈"));
903 }
904
905 #[test]
906 fn test_convert_assignment() {
907 let (result, _, _) = RosettaStone::convert("Define x as 5");
908 assert!(result.contains("≜"));
909 }
910
911 #[test]
912 fn test_mapping_count() {
913 assert!(get_mapping_count() > 300);
914 }
915
916 #[test]
917 fn test_to_prose_basic() {
918 let prose = RosettaStone::to_prose("∀x∈S");
919 assert!(prose.contains("for all"));
920 assert!(prose.contains("in"));
921 }
922
923 #[test]
924 fn test_to_prose_spacing() {
925 let prose = RosettaStone::to_prose("x≜5∧y≜10");
926 assert!(prose.contains("defined as"));
928 assert!(prose.contains("and"));
929 }
930
931 #[test]
932 fn test_round_trip_simple() {
933 let original = "for all x in S";
934 let (aisp, _, _) = RosettaStone::convert(original);
935 let prose = RosettaStone::to_prose(&aisp);
936
937 let similarity = RosettaStone::semantic_similarity(original, &prose);
939 assert!(
940 similarity > 0.5,
941 "Round trip lost too much meaning: {:.2}",
942 similarity
943 );
944 }
945
946 #[test]
947 fn test_round_trip_complex() {
948 let original = "Define x as 5 and for all y in S, if x equals y then return true";
949 let (aisp, _, _) = RosettaStone::convert(original);
950 let prose = RosettaStone::to_prose(&aisp);
951
952 let similarity = RosettaStone::semantic_similarity(original, &prose);
953 assert!(
954 similarity > 0.4,
955 "Complex round trip lost meaning: {:.2}",
956 similarity
957 );
958 }
959
960 #[test]
961 fn test_semantic_similarity() {
962 assert_eq!(
964 RosettaStone::semantic_similarity("hello world", "hello world"),
965 1.0
966 );
967
968 let sim = RosettaStone::semantic_similarity("for all x in set S", "for all x in S");
970 assert!(sim > 0.7);
971
972 let sim = RosettaStone::semantic_similarity("apple banana cherry", "dog cat bird");
974 assert!(sim < 0.2);
975 }
976
977 #[test]
978 fn test_normalize_whitespace() {
979 let result = RosettaStone::normalize_whitespace(" hello world ");
980 assert_eq!(result, "hello world");
981
982 let result = RosettaStone::normalize_whitespace("x ( a , b )");
983 assert_eq!(result, "x (a, b)");
984 }
985
986 #[test]
987 fn test_anti_drift_guarantee() {
988 let symbols_to_test = vec![
991 ("∀", "for all"),
992 ("∃", "exists"),
993 ("⇒", "implies"),
994 ("∈", "in"),
995 ("≜", "defined as"),
996 ("∧", "and"),
997 ("∨", "or"),
998 ];
999
1000 for (symbol, expected_prose) in symbols_to_test {
1001 let prose = RosettaStone::to_prose(symbol);
1002 assert!(
1003 prose.to_lowercase().contains(expected_prose),
1004 "Symbol {} should map to '{}', got '{}'",
1005 symbol,
1006 expected_prose,
1007 prose
1008 );
1009 }
1010 }
1011}