Skip to main content

rosetta_aisp/
rosetta.rs

1//! Rosetta Stone - Bidirectional prose ↔ AISP symbol mappings
2//!
3//! Based on AISP 5.1 Σ_512 glossary specification.
4//! Ported from aisp-converter npm package.
5
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10/// Rosetta Stone mapping entry
11#[derive(Debug, Clone)]
12pub struct RosettaEntry {
13    pub symbol: &'static str,
14    pub patterns: &'static [&'static str],
15    pub category: &'static str,
16}
17
18/// Complete Rosetta Stone mappings (AISP 5.1 Σ_512)
19/// Ported from aisp-converter npm package
20pub static ROSETTA: &[RosettaEntry] = &[
21    // ═══════════════════════════════════════════════════════════════
22    // QUANTIFIERS (∀:Quantifiers[128-191])
23    // ═══════════════════════════════════════════════════════════════
24    RosettaEntry {
25        symbol: "∀",
26        patterns: &["for all", "for every", "every", "all", "each", "any"],
27        category: "quantifier",
28    },
29    RosettaEntry {
30        symbol: "∃",
31        patterns: &["there exists", "exists", "some", "at least one", "there is"],
32        category: "quantifier",
33    },
34    RosettaEntry {
35        symbol: "∃!",
36        patterns: &[
37            "exists unique",
38            "exactly one",
39            "unique",
40            "one and only one",
41            "exists exactly one",
42        ],
43        category: "quantifier",
44    },
45    RosettaEntry {
46        symbol: "∄",
47        patterns: &["does not exist", "no such", "none exists"],
48        category: "quantifier",
49    },
50    // ═══════════════════════════════════════════════════════════════
51    // LOGIC (Ω:Transmuters[0-63])
52    // ═══════════════════════════════════════════════════════════════
53    RosettaEntry {
54        symbol: "∧",
55        patterns: &["and", "both", "as well as", "together with", "also"],
56        category: "logic",
57    },
58    RosettaEntry {
59        symbol: "∨",
60        patterns: &["or", "either", "alternatively", "otherwise"],
61        category: "logic",
62    },
63    RosettaEntry {
64        symbol: "¬",
65        patterns: &["not", "negation", "isn't", "is not", "doesn't", "does not"],
66        category: "logic",
67    },
68    RosettaEntry {
69        symbol: "⇒",
70        patterns: &[
71            "implies",
72            "if then",
73            "therefore",
74            "then",
75            "consequently",
76            "so",
77            "hence",
78        ],
79        category: "logic",
80    },
81    RosettaEntry {
82        symbol: "⇔",
83        patterns: &[
84            "if and only if",
85            "iff",
86            "equivalent to",
87            "is equivalent",
88            "exactly when",
89        ],
90        category: "logic",
91    },
92    RosettaEntry {
93        symbol: "→",
94        patterns: &["to", "returns", "maps to", "yields", "produces", "goes to"],
95        category: "logic",
96    },
97    RosettaEntry {
98        symbol: "↔",
99        patterns: &["bidirectional", "two-way", "both ways"],
100        category: "logic",
101    },
102    RosettaEntry {
103        symbol: "⊕",
104        patterns: &["xor", "exclusive or", "either but not both"],
105        category: "logic",
106    },
107    // ═══════════════════════════════════════════════════════════════
108    // COMPARISON
109    // ═══════════════════════════════════════════════════════════════
110    RosettaEntry {
111        symbol: ">",
112        patterns: &[
113            "greater than",
114            "more than",
115            "exceeds",
116            "above",
117            "larger than",
118        ],
119        category: "comparison",
120    },
121    RosettaEntry {
122        symbol: "<",
123        patterns: &["less than", "fewer than", "below", "smaller than", "under"],
124        category: "comparison",
125    },
126    RosettaEntry {
127        symbol: "≥",
128        patterns: &[
129            "greater than or equal",
130            "at least",
131            "no less than",
132            "minimum",
133            ">=",
134        ],
135        category: "comparison",
136    },
137    RosettaEntry {
138        symbol: "≤",
139        patterns: &[
140            "less than or equal",
141            "at most",
142            "no more than",
143            "maximum",
144            "<=",
145        ],
146        category: "comparison",
147    },
148    RosettaEntry {
149        symbol: "≡",
150        patterns: &[
151            "identical to",
152            "equals",
153            "is equal to",
154            "same as",
155            "equivalent",
156            "===",
157            "==",
158        ],
159        category: "comparison",
160    },
161    RosettaEntry {
162        symbol: "≢",
163        patterns: &[
164            "not identical",
165            "not equal",
166            "differs from",
167            "different from",
168            "!==",
169            "!=",
170        ],
171        category: "comparison",
172    },
173    RosettaEntry {
174        symbol: "≈",
175        patterns: &["approximately", "roughly", "about", "nearly"],
176        category: "comparison",
177    },
178    // ═══════════════════════════════════════════════════════════════
179    // DEFINITION (Ω:Transmuters)
180    // ═══════════════════════════════════════════════════════════════
181    RosettaEntry {
182        symbol: "≜",
183        patterns: &[
184            "defined as",
185            "is defined as",
186            "equals by definition",
187            "is a",
188            "means",
189            "definition",
190        ],
191        category: "definition",
192    },
193    RosettaEntry {
194        symbol: "≔",
195        patterns: &["assigned", "set to", "becomes", "gets", "is assigned", ":="],
196        category: "definition",
197    },
198    RosettaEntry {
199        symbol: "↦",
200        patterns: &["mapsto", "maps to", "sends to"],
201        category: "definition",
202    },
203    // ═══════════════════════════════════════════════════════════════
204    // FUNCTIONS (λ calculus)
205    // ═══════════════════════════════════════════════════════════════
206    RosettaEntry {
207        symbol: "λ",
208        patterns: &[
209            "lambda",
210            "function",
211            "anonymous function",
212            "fn",
213            "func",
214            "=>",
215        ],
216        category: "function",
217    },
218    RosettaEntry {
219        symbol: "∘",
220        patterns: &["compose", "composed with", "followed by"],
221        category: "function",
222    },
223    RosettaEntry {
224        symbol: "fix",
225        patterns: &["fixpoint", "recursive", "fixed point"],
226        category: "function",
227    },
228    RosettaEntry {
229        symbol: "μ",
230        patterns: &["least fixpoint", "lfp", "mu"],
231        category: "function",
232    },
233    // ═══════════════════════════════════════════════════════════════
234    // SETS (Γ:Topologics[64-127])
235    // ═══════════════════════════════════════════════════════════════
236    RosettaEntry {
237        symbol: "∈",
238        patterns: &["in", "element of", "member of", "belongs to", "is in"],
239        category: "set",
240    },
241    RosettaEntry {
242        symbol: "∉",
243        patterns: &["not in", "not element of", "not member of", "outside"],
244        category: "set",
245    },
246    RosettaEntry {
247        symbol: "⊆",
248        patterns: &["subset", "subset of", "contained in", "part of"],
249        category: "set",
250    },
251    RosettaEntry {
252        symbol: "⊇",
253        patterns: &["superset", "superset of", "contains"],
254        category: "set",
255    },
256    RosettaEntry {
257        symbol: "⊂",
258        patterns: &["proper subset", "strict subset"],
259        category: "set",
260    },
261    RosettaEntry {
262        symbol: "⊃",
263        patterns: &["proper superset", "strict superset"],
264        category: "set",
265    },
266    RosettaEntry {
267        symbol: "∪",
268        patterns: &["union", "combined with", "merged with"],
269        category: "set",
270    },
271    RosettaEntry {
272        symbol: "∩",
273        patterns: &["intersection", "overlapping with", "common to", "shared by"],
274        category: "set",
275    },
276    RosettaEntry {
277        symbol: "∅",
278        patterns: &["empty", "empty set", "null", "nothing", "nil", "void"],
279        category: "set",
280    },
281    RosettaEntry {
282        symbol: "𝒫",
283        patterns: &["powerset", "power set", "all subsets"],
284        category: "set",
285    },
286    RosettaEntry {
287        symbol: "∖",
288        patterns: &["set difference", "minus", "except", "without"],
289        category: "set",
290    },
291    RosettaEntry {
292        symbol: "𝔾",
293        patterns: &["graph", "network", "structure"],
294        category: "set",
295    },
296    // ═══════════════════════════════════════════════════════════════
297    // CONTRACTORS (Δ:Contractors[192-255])
298    // ═══════════════════════════════════════════════════════════════
299    RosettaEntry {
300        symbol: "Δ",
301        patterns: &["delta", "difference", "change", "increment"],
302        category: "contractor",
303    },
304    RosettaEntry {
305        symbol: "Pre",
306        patterns: &["precondition", "requires", "before"],
307        category: "contractor",
308    },
309    RosettaEntry {
310        symbol: "Post",
311        patterns: &["postcondition", "ensures", "after", "guarantees"],
312        category: "contractor",
313    },
314    RosettaEntry {
315        symbol: "Inv",
316        patterns: &["invariant", "always true", "maintained"],
317        category: "contractor",
318    },
319    // ═══════════════════════════════════════════════════════════════
320    // INTENTS (Ψ:Intents[320-383])
321    // ═══════════════════════════════════════════════════════════════
322    RosettaEntry {
323        symbol: "Ψ",
324        patterns: &["intent", "goal", "purpose", "objective"],
325        category: "intent",
326    },
327    RosettaEntry {
328        symbol: "μ",
329        patterns: &["fitness", "utility", "score", "metric"],
330        category: "intent",
331    },
332    RosettaEntry {
333        symbol: "Target",
334        patterns: &["target", "aim", "destination"],
335        category: "intent",
336    },
337    // ═══════════════════════════════════════════════════════════════
338    // TYPES (𝔻:Domaines[256-319])
339    // ═══════════════════════════════════════════════════════════════
340    RosettaEntry {
341        symbol: "ℕ",
342        patterns: &[
343            "natural",
344            "natural number",
345            "positive integer",
346            "nat",
347            "natural numbers",
348            "unsigned",
349        ],
350        category: "type",
351    },
352    RosettaEntry {
353        symbol: "ℤ",
354        patterns: &[
355            "integer",
356            "int",
357            "whole number",
358            "integers",
359            "signed integer",
360        ],
361        category: "type",
362    },
363    RosettaEntry {
364        symbol: "ℝ",
365        patterns: &[
366            "real",
367            "real number",
368            "float",
369            "decimal",
370            "double",
371            "number",
372        ],
373        category: "type",
374    },
375    RosettaEntry {
376        symbol: "ℚ",
377        patterns: &["rational", "rational number", "fraction"],
378        category: "type",
379    },
380    RosettaEntry {
381        symbol: "𝔹",
382        patterns: &["boolean", "bool", "true or false", "binary", "flag"],
383        category: "type",
384    },
385    RosettaEntry {
386        symbol: "𝕊",
387        patterns: &["string", "str", "text", "char sequence", "varchar"],
388        category: "type",
389    },
390    RosettaEntry {
391        symbol: "ℂ",
392        patterns: &["complex", "complex number"],
393        category: "type",
394    },
395    RosettaEntry {
396        symbol: "List",
397        patterns: &["list", "array", "sequence", "vector"],
398        category: "type",
399    },
400    RosettaEntry {
401        symbol: "Maybe",
402        patterns: &["maybe", "optional", "nullable", "option"],
403        category: "type",
404    },
405    RosettaEntry {
406        symbol: "Either",
407        patterns: &["either", "result", "union type"],
408        category: "type",
409    },
410    // ═══════════════════════════════════════════════════════════════
411    // TRUTH VALUES
412    // ═══════════════════════════════════════════════════════════════
413    RosettaEntry {
414        symbol: "⊤",
415        patterns: &["true", "top", "yes", "valid", "correct", "success", "ok"],
416        category: "truth",
417    },
418    RosettaEntry {
419        symbol: "⊥",
420        patterns: &[
421            "false",
422            "bottom",
423            "no",
424            "invalid",
425            "incorrect",
426            "failure",
427            "crash",
428            "error",
429        ],
430        category: "truth",
431    },
432    // ═══════════════════════════════════════════════════════════════
433    // SPECIAL (proofs, assertions)
434    // ═══════════════════════════════════════════════════════════════
435    RosettaEntry {
436        symbol: "∎",
437        patterns: &["qed", "proven", "end of proof", "proved", "done"],
438        category: "special",
439    },
440    RosettaEntry {
441        symbol: "⊢",
442        patterns: &["proves", "entails", "derives", "turnstile", "yields"],
443        category: "special",
444    },
445    RosettaEntry {
446        symbol: "⊨",
447        patterns: &["models", "satisfies", "validates"],
448        category: "special",
449    },
450    RosettaEntry {
451        symbol: "□",
452        patterns: &["necessarily", "always", "box"],
453        category: "special",
454    },
455    RosettaEntry {
456        symbol: "◇",
457        patterns: &["possibly", "eventually", "diamond"],
458        category: "special",
459    },
460    // ═══════════════════════════════════════════════════════════════
461    // MATH OPERATORS
462    // ═══════════════════════════════════════════════════════════════
463    RosettaEntry {
464        symbol: "+",
465        patterns: &["plus", "added to", "sum of", "add"],
466        category: "math",
467    },
468    RosettaEntry {
469        symbol: "−",
470        patterns: &["minus", "subtract", "subtracted from"],
471        category: "math",
472    },
473    RosettaEntry {
474        symbol: "×",
475        patterns: &["times", "multiplied by", "product of", "multiply"],
476        category: "math",
477    },
478    RosettaEntry {
479        symbol: "÷",
480        patterns: &["divided by", "over", "ratio of", "divide"],
481        category: "math",
482    },
483    RosettaEntry {
484        symbol: "²",
485        patterns: &["squared", "square of", "to the power of 2"],
486        category: "math",
487    },
488    RosettaEntry {
489        symbol: "³",
490        patterns: &["cubed", "cube of", "to the power of 3"],
491        category: "math",
492    },
493    RosettaEntry {
494        symbol: "√",
495        patterns: &["square root", "sqrt", "root of"],
496        category: "math",
497    },
498    RosettaEntry {
499        symbol: "Σ",
500        patterns: &["sum", "summation", "sigma"],
501        category: "math",
502    },
503    RosettaEntry {
504        symbol: "Π",
505        patterns: &["product", "pi", "prod"],
506        category: "math",
507    },
508    RosettaEntry {
509        symbol: "∞",
510        patterns: &["infinity", "infinite", "unbounded"],
511        category: "math",
512    },
513    // ═══════════════════════════════════════════════════════════════
514    // BLOCK MARKERS (⟦⟧:Delimiters[384-447])
515    // ═══════════════════════════════════════════════════════════════
516    RosettaEntry {
517        symbol: "⟦Ω⟧",
518        patterns: &["meta block", "metadata", "foundation"],
519        category: "block",
520    },
521    RosettaEntry {
522        symbol: "⟦Σ⟧",
523        patterns: &["types block", "type definitions", "glossary"],
524        category: "block",
525    },
526    RosettaEntry {
527        symbol: "⟦Γ⟧",
528        patterns: &["rules block", "business rules", "constraints"],
529        category: "block",
530    },
531    RosettaEntry {
532        symbol: "⟦Λ⟧",
533        patterns: &["functions block", "function definitions", "lambdas"],
534        category: "block",
535    },
536    RosettaEntry {
537        symbol: "⟦Χ⟧",
538        patterns: &["errors block", "error handling", "exceptions"],
539        category: "block",
540    },
541    RosettaEntry {
542        symbol: "⟦Ε⟧",
543        patterns: &["evidence block", "proof", "validation"],
544        category: "block",
545    },
546    // ═══════════════════════════════════════════════════════════════
547    // TUPLES & RECORDS
548    // ═══════════════════════════════════════════════════════════════
549    RosettaEntry {
550        symbol: "⟨",
551        patterns: &["tuple start", "record start", "angle open"],
552        category: "special",
553    },
554    RosettaEntry {
555        symbol: "⟩",
556        patterns: &["tuple end", "record end", "angle close"],
557        category: "special",
558    },
559    // ═══════════════════════════════════════════════════════════════
560    // QUALITY TIERS
561    // ═══════════════════════════════════════════════════════════════
562    RosettaEntry {
563        symbol: "◊⁺⁺",
564        patterns: &["platinum", "platinum tier", "optimal"],
565        category: "tier",
566    },
567    RosettaEntry {
568        symbol: "◊⁺",
569        patterns: &["gold", "gold tier", "production ready"],
570        category: "tier",
571    },
572    RosettaEntry {
573        symbol: "◊",
574        patterns: &["silver", "silver tier", "good"],
575        category: "tier",
576    },
577    RosettaEntry {
578        symbol: "◊⁻",
579        patterns: &["bronze", "bronze tier", "acceptable"],
580        category: "tier",
581    },
582    RosettaEntry {
583        symbol: "⊘",
584        patterns: &["reject", "rejected", "invalid tier"],
585        category: "tier",
586    },
587];
588
589lazy_static! {
590    /// Rosetta entries sorted by longest pattern first (greedy matching)
591    pub static ref ROSETTA_SORTED: Vec<&'static RosettaEntry> = {
592        let mut entries: Vec<_> = ROSETTA.iter().collect();
593        entries.sort_by(|a, b| {
594            let max_a = a.patterns.iter().map(|p| p.len()).max().unwrap_or(0);
595            let max_b = b.patterns.iter().map(|p| p.len()).max().unwrap_or(0);
596            max_b.cmp(&max_a)
597        });
598        entries
599    };
600
601    /// Pattern to symbol lookup
602    pub static ref PATTERN_TO_SYMBOL: HashMap<String, &'static str> = {
603        let mut m = HashMap::new();
604        for entry in ROSETTA {
605            for pattern in entry.patterns {
606                m.insert(pattern.to_lowercase(), entry.symbol);
607            }
608        }
609        m
610    };
611
612    /// Symbol to primary pattern lookup
613    pub static ref SYMBOL_TO_PATTERN: HashMap<&'static str, &'static str> = {
614        let mut m = HashMap::new();
615        for entry in ROSETTA {
616            if let Some(first) = entry.patterns.first() {
617                m.insert(entry.symbol, *first);
618            }
619        }
620        m
621    };
622
623    /// Compiled Rosetta entries for efficient matching
624    pub static ref ROSETTA_COMPILED: Vec<CompiledRosettaEntry> = {
625        ROSETTA_SORTED.iter().map(|entry| {
626            let compiled_patterns = entry.patterns.iter().filter_map(|pattern| {
627                let regex_str = format!(r"(?i)\b{}\b", escape_regex(pattern));
628                Regex::new(&regex_str).ok()
629            }).collect();
630            
631            CompiledRosettaEntry {
632                symbol: entry.symbol,
633                regexes: compiled_patterns,
634            }
635        }).collect()
636    };
637}
638
639/// Pre-compiled Rosetta entry
640pub struct CompiledRosettaEntry {
641    pub symbol: &'static str,
642    pub regexes: Vec<Regex>,
643}
644
645/// Find symbol for a prose pattern
646pub fn prose_to_symbol(pattern: &str) -> Option<&'static str> {
647    PATTERN_TO_SYMBOL
648        .get(&pattern.to_lowercase().trim().to_string())
649        .copied()
650}
651
652/// Find primary prose pattern for a symbol
653pub fn symbol_to_prose(symbol: &str) -> Option<&'static str> {
654    SYMBOL_TO_PATTERN.get(symbol).copied()
655}
656
657/// Get all symbols in a category
658pub fn symbols_by_category(category: &str) -> Vec<&'static str> {
659    ROSETTA
660        .iter()
661        .filter(|e| e.category == category)
662        .map(|e| e.symbol)
663        .collect()
664}
665
666/// Get all categories
667pub fn get_all_categories() -> Vec<&'static str> {
668    let mut categories: Vec<_> = ROSETTA.iter().map(|e| e.category).collect();
669    categories.sort();
670    categories.dedup();
671    categories
672}
673
674/// Count total mappings
675pub fn get_mapping_count() -> usize {
676    ROSETTA.iter().map(|e| e.patterns.len()).sum()
677}
678
679/// Escape regex special characters
680fn escape_regex(s: &str) -> String {
681    let special = [
682        '\\', '.', '*', '+', '?', '^', '$', '{', '}', '(', ')', '|', '[', ']',
683    ];
684    let mut result = String::with_capacity(s.len() * 2);
685    for c in s.chars() {
686        if special.contains(&c) {
687            result.push('\\');
688        }
689        result.push(c);
690    }
691    result
692}
693
694/// Rosetta Stone converter
695pub struct RosettaStone;
696
697impl RosettaStone {
698    /// Convert prose to AISP symbols using deterministic mappings
699    /// Returns (converted_text, mapped_chars, unmapped_words)
700    pub fn convert(input: &str) -> (String, usize, Vec<String>) {
701        let mut result = input.to_string();
702        let mut mapped_chars = 0;
703        let _total_chars = input.len();
704
705        // Apply Rosetta mappings (longest patterns first) using pre-compiled regexes
706        for entry in ROSETTA_COMPILED.iter() {
707            for regex in entry.regexes.iter() {
708                let matches: Vec<_> = regex.find_iter(&result).collect();
709                mapped_chars += matches.iter().map(|m| m.as_str().len()).sum::<usize>();
710                result = regex.replace_all(&result, entry.symbol).to_string();
711            }
712        }
713
714        // Clean up operators (remove extra spaces)
715        result = Self::cleanup_operators(&result);
716
717        // Convert assignment patterns
718        result = Self::convert_assignments(&result);
719
720        // Find unmapped words
721        let unmapped = Self::find_unmapped_words(&result);
722
723        (result.trim().to_string(), mapped_chars, unmapped)
724    }
725
726    /// Calculate conversion confidence
727    pub fn confidence(input_len: usize, mapped_chars: usize) -> f64 {
728        if input_len == 0 {
729            return 1.0;
730        }
731        (mapped_chars as f64 / input_len as f64).min(1.0)
732    }
733
734    /// Clean up operators by removing extra spaces
735    fn cleanup_operators(input: &str) -> String {
736        let operators = ["≜", "≔", "⇒", "∈", "→", "⇔", "∧", "∨"];
737        let mut result = input.to_string();
738
739        for op in operators {
740            let regex_str = format!(r"\s*{}\s*", escape_regex(op));
741            if let Ok(regex) = Regex::new(&regex_str) {
742                result = regex.replace_all(&result, op).to_string();
743            }
744        }
745
746        result
747    }
748
749    /// Convert common assignment patterns
750    fn convert_assignments(input: &str) -> String {
751        let mut result = input.to_string();
752
753        // Convert "const x = 5" to "x≜5"
754        if let Ok(regex) = Regex::new(r"(?i)const\s+(\w+)\s*=\s*(\S+)") {
755            result = regex.replace_all(&result, "$1≜$2").to_string();
756        }
757
758        // Convert "Define x as y" to "x≜y"
759        if let Ok(regex) = Regex::new(r"(?i)Define\s+(\w+)\s+as\s+(\S+)") {
760            result = regex.replace_all(&result, "$1≜$2").to_string();
761        }
762
763        // Convert "let x = y" to "x≜y"
764        if let Ok(regex) = Regex::new(r"(?i)let\s+(\w+)\s*=\s*(\S+)") {
765            result = regex.replace_all(&result, "$1≜$2").to_string();
766        }
767
768        result
769    }
770
771    /// Find words that weren't mapped to symbols
772    fn find_unmapped_words(result: &str) -> Vec<String> {
773        let ignore_words = [
774            "the", "with", "that", "this", "from", "into", "when", "where", "which", "what",
775        ];
776
777        let word_regex = Regex::new(r"\b[a-zA-Z]{3,}\b").unwrap();
778        let words: Vec<_> = word_regex
779            .find_iter(result)
780            .map(|m| m.as_str().to_lowercase())
781            .collect();
782
783        let mut unique: Vec<_> = words
784            .into_iter()
785            .filter(|w| !ignore_words.contains(&w.as_str()))
786            .collect();
787
788        unique.sort();
789        unique.dedup();
790        unique
791    }
792
793    /// Convert AISP symbols back to prose
794    /// Maintains spacing for readability while preserving semantic meaning
795    pub fn to_prose(input: &str) -> String {
796        let mut result = input.to_string();
797
798        // Sort by symbol length (longest first) to avoid partial replacements
799        let mut entries: Vec<_> = ROSETTA.iter().collect();
800        entries.sort_by(|a, b| b.symbol.len().cmp(&a.symbol.len()));
801
802        for entry in entries {
803            if let Some(primary) = entry.patterns.first() {
804                // Add spaces around word replacements for readability
805                let replacement = format!(" {} ", primary);
806                result = result.replace(entry.symbol, &replacement);
807            }
808        }
809
810        // Ensure spaces between letters that got concatenated
811        // Handles cases like "adminimpliesallow" → "admin implies allow"
812        result = Self::add_word_boundaries(&result);
813
814        // Clean up multiple spaces and trim
815        Self::normalize_whitespace(&result)
816    }
817
818    /// Add spaces between concatenated words
819    fn add_word_boundaries(input: &str) -> String {
820        // Add space between lowercase followed by uppercase
821        let camel_case = Regex::new(r"([a-z])([A-Z])").unwrap();
822        let result = camel_case.replace_all(input, "$1 $2");
823
824        // Add space before words that follow certain patterns
825        let word_join = Regex::new(r"([a-zA-Z])( )(for all|exists|implies|and|or|not|if|then|else|in|defined as|identical to|true|false|lambda|function|returns|boolean|integer|string|natural|real|proves|therefore|yields)( )").unwrap();
826        let result = word_join.replace_all(&result, "$1 $3 ");
827
828        result.to_string()
829    }
830
831    /// Normalize whitespace in text
832    fn normalize_whitespace(input: &str) -> String {
833        let multiple_spaces = Regex::new(r"\s+").unwrap();
834        let result = multiple_spaces.replace_all(input, " ");
835
836        // Clean up spaces around punctuation
837        let space_before_punct = Regex::new(r"\s+([.,;:!?])").unwrap();
838        let result = space_before_punct.replace_all(&result, "$1");
839
840        // Clean up spaces after opening brackets
841        let space_after_open = Regex::new(r"([(\[{])\s+").unwrap();
842        let result = space_after_open.replace_all(&result, "$1");
843
844        // Clean up spaces before closing brackets
845        let space_before_close = Regex::new(r"\s+([)\]}])").unwrap();
846        let result = space_before_close.replace_all(&result, "$1");
847
848        result.trim().to_string()
849    }
850
851    /// Normalize text for semantic comparison (removes formatting differences)
852    pub fn normalize_for_comparison(input: &str) -> String {
853        let lowercase = input.to_lowercase();
854        let normalized = Self::normalize_whitespace(&lowercase);
855
856        // Remove punctuation for semantic comparison
857        let punct_regex = Regex::new(r#"[.,;:!?"']"#).unwrap();
858        punct_regex.replace_all(&normalized, "").trim().to_string()
859    }
860
861    /// Check semantic equivalence between two texts
862    /// Returns similarity score from 0.0 to 1.0
863    pub fn semantic_similarity(text1: &str, text2: &str) -> f64 {
864        let norm1 = Self::normalize_for_comparison(text1);
865        let norm2 = Self::normalize_for_comparison(text2);
866
867        // Extract words
868        let words1: HashSet<_> = norm1.split_whitespace().collect();
869        let words2: HashSet<_> = norm2.split_whitespace().collect();
870
871        if words1.is_empty() && words2.is_empty() {
872            return 1.0;
873        }
874
875        // Jaccard similarity
876        let intersection = words1.intersection(&words2).count();
877        let union = words1.union(&words2).count();
878
879        if union == 0 {
880            1.0
881        } else {
882            intersection as f64 / union as f64
883        }
884    }
885}
886
887#[cfg(test)]
888mod tests {
889    use super::*;
890
891    #[test]
892    fn test_prose_to_symbol() {
893        assert_eq!(prose_to_symbol("for all"), Some("∀"));
894        assert_eq!(prose_to_symbol("exists"), Some("∃"));
895        assert_eq!(prose_to_symbol("unknown"), None);
896    }
897
898    #[test]
899    fn test_convert_basic() {
900        let (result, _, _) = RosettaStone::convert("for all x in S");
901        assert!(result.contains("∀"));
902        assert!(result.contains("∈"));
903    }
904
905    #[test]
906    fn test_convert_assignment() {
907        let (result, _, _) = RosettaStone::convert("Define x as 5");
908        assert!(result.contains("≜"));
909    }
910
911    #[test]
912    fn test_mapping_count() {
913        assert!(get_mapping_count() > 300);
914    }
915
916    #[test]
917    fn test_to_prose_basic() {
918        let prose = RosettaStone::to_prose("∀x∈S");
919        assert!(prose.contains("for all"));
920        assert!(prose.contains("in"));
921    }
922
923    #[test]
924    fn test_to_prose_spacing() {
925        let prose = RosettaStone::to_prose("x≜5∧y≜10");
926        // Should have spaces for readability
927        assert!(prose.contains("defined as"));
928        assert!(prose.contains("and"));
929    }
930
931    #[test]
932    fn test_round_trip_simple() {
933        let original = "for all x in S";
934        let (aisp, _, _) = RosettaStone::convert(original);
935        let prose = RosettaStone::to_prose(&aisp);
936
937        // Check semantic similarity
938        let similarity = RosettaStone::semantic_similarity(original, &prose);
939        assert!(
940            similarity > 0.5,
941            "Round trip lost too much meaning: {:.2}",
942            similarity
943        );
944    }
945
946    #[test]
947    fn test_round_trip_complex() {
948        let original = "Define x as 5 and for all y in S, if x equals y then return true";
949        let (aisp, _, _) = RosettaStone::convert(original);
950        let prose = RosettaStone::to_prose(&aisp);
951
952        let similarity = RosettaStone::semantic_similarity(original, &prose);
953        assert!(
954            similarity > 0.4,
955            "Complex round trip lost meaning: {:.2}",
956            similarity
957        );
958    }
959
960    #[test]
961    fn test_semantic_similarity() {
962        // Identical texts
963        assert_eq!(
964            RosettaStone::semantic_similarity("hello world", "hello world"),
965            1.0
966        );
967
968        // Similar texts
969        let sim = RosettaStone::semantic_similarity("for all x in set S", "for all x in S");
970        assert!(sim > 0.7);
971
972        // Different texts
973        let sim = RosettaStone::semantic_similarity("apple banana cherry", "dog cat bird");
974        assert!(sim < 0.2);
975    }
976
977    #[test]
978    fn test_normalize_whitespace() {
979        let result = RosettaStone::normalize_whitespace("  hello   world  ");
980        assert_eq!(result, "hello world");
981
982        let result = RosettaStone::normalize_whitespace("x ( a , b )");
983        assert_eq!(result, "x (a, b)");
984    }
985
986    #[test]
987    fn test_anti_drift_guarantee() {
988        // AISP Anti-drift rule: Mean(s) ≡ Mean_0(s)
989        // Symbols should maintain consistent meaning through round-trips
990        let symbols_to_test = vec![
991            ("∀", "for all"),
992            ("∃", "exists"),
993            ("⇒", "implies"),
994            ("∈", "in"),
995            ("≜", "defined as"),
996            ("∧", "and"),
997            ("∨", "or"),
998        ];
999
1000        for (symbol, expected_prose) in symbols_to_test {
1001            let prose = RosettaStone::to_prose(symbol);
1002            assert!(
1003                prose.to_lowercase().contains(expected_prose),
1004                "Symbol {} should map to '{}', got '{}'",
1005                symbol,
1006                expected_prose,
1007                prose
1008            );
1009        }
1010    }
1011}