voided_core/obfuscation/
map.rs

1//! Map generation for character obfuscation.
2//!
3//! Generates deterministic obfuscation maps based on temperature and seed.
4
5use alloc::{
6    collections::BTreeMap,
7    string::{String, ToString},
8    vec::Vec,
9};
10use serde::{Deserialize, Serialize};
11
12use super::ObfuscationMap;
13
14/// Temperature configuration for map generation
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct TemperatureConfig {
17    /// Temperature value (0.0 to 1.0)
18    pub temperature: f64,
19    /// Minimum number of mappings per character
20    pub min_mappings: usize,
21    /// Maximum number of mappings per character
22    pub max_mappings: usize,
23    /// Minimum length of individual mappings
24    pub min_length: usize,
25    /// Maximum length of individual mappings
26    pub max_length: usize,
27    /// Target expansion ratio
28    pub expansion_ratio: f64,
29    /// Relative computational cost (0-100)
30    pub compute_score: u32,
31}
32
33impl Default for TemperatureConfig {
34    fn default() -> Self {
35        Self {
36            temperature: 0.5,
37            min_mappings: 2,
38            max_mappings: 5,
39            min_length: 2,
40            max_length: 8,
41            expansion_ratio: 2.1,
42            compute_score: 40,
43        }
44    }
45}
46
47/// Pre-defined temperature profiles
48pub fn get_temperature_profile(name: &str) -> Option<TemperatureConfig> {
49    match name {
50        "minimal" => Some(TemperatureConfig {
51            temperature: 0.0,
52            min_mappings: 1,
53            max_mappings: 2,
54            min_length: 2,  // Minimum 2 chars to avoid charset collisions
55            max_length: 3,
56            expansion_ratio: 1.5,
57            compute_score: 5,
58        }),
59        "low" => Some(TemperatureConfig {
60            temperature: 0.2,
61            min_mappings: 1,
62            max_mappings: 3,
63            min_length: 2,  // Minimum 2 chars to avoid charset collisions
64            max_length: 4,
65            expansion_ratio: 1.5,
66            compute_score: 15,
67        }),
68        "medium" => Some(TemperatureConfig {
69            temperature: 0.5,
70            min_mappings: 2,
71            max_mappings: 5,
72            min_length: 2,
73            max_length: 8,
74            expansion_ratio: 2.1,
75            compute_score: 40,
76        }),
77        "high" => Some(TemperatureConfig {
78            temperature: 0.8,
79            min_mappings: 3,
80            max_mappings: 8,
81            min_length: 3,
82            max_length: 12,
83            expansion_ratio: 3.7,
84            compute_score: 75,
85        }),
86        "extreme" => Some(TemperatureConfig {
87            temperature: 1.0,
88            min_mappings: 5,
89            max_mappings: 15,
90            min_length: 4,
91            max_length: 20,
92            expansion_ratio: 6.2,
93            compute_score: 100,
94        }),
95        _ => None,
96    }
97}
98
99/// Get temperature config from temperature value
100pub fn get_config_from_temperature(temperature: f64) -> TemperatureConfig {
101    let clamped = temperature.clamp(0.0, 1.0);
102    
103    // Interpolate between profiles
104    if clamped <= 0.1 {
105        get_temperature_profile("minimal").unwrap()
106    } else if clamped <= 0.3 {
107        get_temperature_profile("low").unwrap()
108    } else if clamped <= 0.6 {
109        get_temperature_profile("medium").unwrap()
110    } else if clamped <= 0.85 {
111        get_temperature_profile("high").unwrap()
112    } else {
113        get_temperature_profile("extreme").unwrap()
114    }
115}
116
117/// Options for map generation
118#[derive(Debug, Clone)]
119pub struct GenerateMapOptions {
120    /// Temperature (0.0 to 1.0)
121    pub temperature: f64,
122    /// Seed for deterministic generation
123    pub seed: Option<String>,
124    /// Character set to create mappings for
125    pub charset: Option<String>,
126}
127
128impl Default for GenerateMapOptions {
129    fn default() -> Self {
130        Self {
131            temperature: 0.5,
132            seed: None,
133            charset: None,
134        }
135    }
136}
137
138/// Seeded random number generator for deterministic map generation
139struct SeededRandom {
140    state: u64,
141}
142
143impl SeededRandom {
144    fn new(seed: &str) -> Self {
145        let mut hash: u64 = 0;
146        for (i, byte) in seed.bytes().enumerate() {
147            hash = hash.wrapping_add((byte as u64).wrapping_mul(31u64.wrapping_pow(i as u32)));
148        }
149        Self { state: hash.max(1) }
150    }
151
152    fn next(&mut self) -> u64 {
153        self.state = self.state.wrapping_mul(9301).wrapping_add(49297) % 233280;
154        self.state
155    }
156
157    fn next_usize(&mut self, max: usize) -> usize {
158        if max == 0 { return 0; }
159        (self.next() as usize) % max
160    }
161
162    fn next_range(&mut self, min: usize, max: usize) -> usize {
163        if min >= max { return min; }
164        min + self.next_usize(max - min + 1)
165    }
166}
167
168/// Word pools for different complexity levels
169/// IMPORTANT: All mappings must be at least 2 characters to avoid collisions
170/// with charset characters during deobfuscation. A single character mapping
171/// like "a" would collide when "a" appears as an unmapped character in text.
172const SIMPLE_WORDS: &[&str] = &[
173    "aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj", "ak", "al", "am",
174    "an", "ao", "ap", "aq", "ar", "as", "at", "au", "av", "aw", "ax", "ay", "az",
175    "ba", "bb", "bc", "bd", "bf", "bg", "bh", "bi", "bj", "bk", "bl", "bm",
176    "bn", "bo", "bp", "bq", "br", "bs", "bt", "bu", "bv", "bw", "bx", "by", "bz",
177];
178
179const SHORT_WORDS: &[&str] = &[
180    "ox", "go", "hi", "me", "we", "up", "in", "on", "at", "to",
181    "be", "do", "he", "it", "my", "no", "of", "so", "us", "an",
182];
183
184const MEDIUM_WORDS: &[&str] = &[
185    "cat", "dog", "sun", "moon", "star", "tree", "bird", "fish",
186    "book", "door", "hand", "eye", "car", "red", "blue", "gold",
187    "fire", "water", "earth", "wind",
188];
189
190const WORDS: &[&str] = &[
191    "apple", "beach", "cloud", "dream", "eagle", "flame", "green",
192    "heart", "ivory", "jewel", "knife", "lemon", "magic", "north",
193    "ocean", "pearl", "quiet", "river", "storm", "tower",
194];
195
196const PHRASES: &[&str] = &[
197    "bright_star", "deep_ocean", "wild_forest", "golden_sand",
198    "silver_moon", "crystal_clear", "gentle_breeze", "warm_sunshine",
199    "cool_shadow", "fresh_water", "ancient_tree", "peaceful_valley",
200    "endless_sky", "hidden_treasure", "mystic_fog",
201];
202
203const COMPLEX: &[&str] = &[
204    "thunderstorm_approaching", "crystalline_formation",
205    "electromagnetic_pulse", "quantum_entanglement",
206    "bioluminescent_glow", "aerodynamic_structure",
207    "photosynthetic_process", "metamorphic_transformation",
208    "exponential_growth", "algorithmic_complexity",
209];
210
211/// Default character set for obfuscation
212/// Note: Delimiter (U+001F) is NOT included to avoid conflicts
213const DEFAULT_CHARSET: &str = 
214    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \t\n.,!?;:()[]{}\"'-=+*/%&@#$^~`|\\<>";
215
216/// Generate random strings for the word pool
217/// Ensures generated strings don't contain control characters or the delimiter
218fn generate_random_strings(
219    count: usize,
220    min_length: usize,
221    max_length: usize,
222    rng: &mut SeededRandom,
223) -> Vec<String> {
224    // Safe characters only: alphanumeric, underscore, hyphen
225    // Excludes control characters and the delimiter (U+001F)
226    const CHARS: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-";
227    let mut result = Vec::with_capacity(count);
228
229    for _ in 0..count {
230        let length = rng.next_range(min_length, max_length);
231        let mut s = String::with_capacity(length);
232        for _ in 0..length {
233            let idx = rng.next_usize(CHARS.len());
234            s.push(CHARS[idx] as char);
235        }
236        result.push(s);
237    }
238
239    result
240}
241
242/// Generate word pool based on temperature config
243fn generate_word_pool(config: &TemperatureConfig, rng: &mut SeededRandom) -> Vec<String> {
244    let mut pools: Vec<String> = Vec::new();
245
246    // Select word pools based on length requirements
247    // Include pools that have words within our min-max length range
248    // SIMPLE_WORDS: 2-char words
249    // SHORT_WORDS: 2-char words  
250    // MEDIUM_WORDS: 3-5 char words
251    // WORDS: 5+ char words
252    // PHRASES: longer compound words
253    // COMPLEX: very long words
254    
255    // Always include SIMPLE and SHORT for min_length <= 2
256    if config.max_length >= 2 {
257        pools.extend(SIMPLE_WORDS.iter().map(|s| s.to_string()));
258        pools.extend(SHORT_WORDS.iter().map(|s| s.to_string()));
259    }
260    // Include MEDIUM for max_length >= 3
261    if config.max_length >= 3 {
262        pools.extend(MEDIUM_WORDS.iter().map(|s| s.to_string()));
263    }
264    // Include WORDS for max_length >= 5
265    if config.max_length >= 5 {
266        pools.extend(WORDS.iter().map(|s| s.to_string()));
267    }
268    // Include PHRASES for higher temperatures
269    if config.temperature >= 0.5 && config.max_length >= 10 {
270        pools.extend(PHRASES.iter().map(|s| s.to_string()));
271    }
272    // Include COMPLEX for very high temperatures
273    if config.temperature >= 0.8 && config.max_length >= 15 {
274        pools.extend(COMPLEX.iter().map(|s| s.to_string()));
275    }
276
277    // Add random strings based on temperature
278    let random_count = (config.temperature * 200.0) as usize;
279    if random_count > 0 {
280        pools.extend(generate_random_strings(
281            random_count,
282            config.min_length,
283            config.max_length,
284            rng,
285        ));
286    }
287
288    // Filter by length requirements and ensure no unsafe characters
289    // Remove any words containing control characters or the delimiter
290    // CRITICAL: All mappings must be at least 2 characters to avoid collisions
291    // with charset characters during deobfuscation
292    // Sort first to ensure deterministic filtering order
293    pools.sort();
294    let min_mapping_length = config.min_length.max(2); // Force minimum of 2
295    let mut filtered: Vec<String> = pools
296        .into_iter()
297        .filter(|word| {
298            word.len() >= min_mapping_length 
299                && word.len() <= config.max_length
300                && !word.chars().any(|c| {
301                    // Exclude control characters (0x00-0x1F) except tab and newline
302                    // But actually, we should exclude ALL control characters for JSON safety
303                    // Also exclude the delimiter
304                    c == '\x1F' || (c < ' ' && c != '\t' && c != '\n')
305                })
306        })
307        .collect();
308
309    // Word pool size is variable based on temperature
310    // Lower temperature = smaller pool (simpler mappings)
311    // Higher temperature = larger pool (more complex mappings)
312    // Minimum pool size to ensure we have enough unique mappings
313    let min_pool_size = 50;
314    let target_pool_size = min_pool_size + ((config.temperature * 150.0) as usize);
315    
316    // Generate additional random strings if needed to reach target size
317    while filtered.len() < target_pool_size {
318        let needed = target_pool_size - filtered.len();
319        let additional = generate_random_strings(
320            needed * 2, // Generate extra to account for potential filtering
321            min_mapping_length, // Use the safe minimum
322            config.max_length,
323            rng,
324        );
325        // Filter the additional strings and add them
326        for word in additional {
327            if word.len() >= min_mapping_length 
328                && word.len() <= config.max_length
329                && !word.chars().any(|c| c == '\x1F' || (c < ' ' && c != '\t' && c != '\n'))
330                && !filtered.contains(&word) {
331                filtered.push(word);
332                if filtered.len() >= target_pool_size {
333                    break;
334                }
335            }
336        }
337    }
338    
339    // Don't truncate - keep variable size based on temperature
340    // But ensure we have at least the minimum
341    if filtered.len() < min_pool_size {
342        // This shouldn't happen, but ensure minimum
343        let needed = min_pool_size - filtered.len();
344        let additional = generate_random_strings(
345            needed * 2,
346            min_mapping_length, // Use the safe minimum
347            config.max_length,
348            rng,
349        );
350        for word in additional {
351            if word.len() >= min_mapping_length 
352                && word.len() <= config.max_length
353                && !word.chars().any(|c| c == '\x1F' || (c < ' ' && c != '\t' && c != '\n'))
354                && !filtered.contains(&word) {
355                filtered.push(word);
356                if filtered.len() >= min_pool_size {
357                    break;
358                }
359            }
360        }
361    }
362
363    // Sort before shuffling to ensure deterministic starting point
364    filtered.sort();
365    
366    // Shuffle the pool deterministically
367    for i in (1..filtered.len()).rev() {
368        let j = rng.next_usize(i + 1);
369        filtered.swap(i, j);
370    }
371
372    filtered
373}
374
375/// Generate an obfuscation map with temperature-based complexity
376///
377/// # Arguments
378///
379/// * `options` - Generation options including temperature and seed
380///
381/// # Returns
382///
383/// An ObfuscationMap mapping each character to possible substitutions
384pub fn generate_map(options: Option<GenerateMapOptions>) -> ObfuscationMap {
385    let opts = options.unwrap_or_default();
386    let temperature = opts.temperature.clamp(0.0, 1.0);
387    
388    // Generate seed if not provided - use deterministic hash instead of thread_rng
389    // to avoid N-API deadlock issues
390    let seed = opts.seed.unwrap_or_else(|| {
391        // Create a deterministic seed from temperature
392        // In practice, the TypeScript wrapper should always provide a seed
393        // This is just a fallback that won't deadlock
394        let temp_bits = temperature.to_bits();
395        let mut hash: u64 = 0;
396        for i in 0..8 {
397            hash = hash.wrapping_mul(31).wrapping_add(((temp_bits >> (i * 8)) & 0xFF) as u64);
398        }
399        format!("{:016x}", hash)
400    });
401    
402    let charset = opts.charset.unwrap_or_else(|| DEFAULT_CHARSET.to_string());
403    
404    let config = get_config_from_temperature(temperature);
405    // Create a fresh RNG for word pool generation to ensure determinism
406    let mut pool_rng = SeededRandom::new(&format!("{}-pool", seed));
407    let word_pool = generate_word_pool(&config, &mut pool_rng);
408    
409    let mut map: ObfuscationMap = BTreeMap::new();
410    let mut used_mappings: std::collections::HashSet<String> = std::collections::HashSet::new();
411    
412    // Collect charset characters into a Vec and sort for deterministic iteration order
413    let mut charset_chars: Vec<char> = charset.chars().collect();
414    charset_chars.sort();
415    
416    // Pre-calculate how many mappings each character needs (deterministic)
417    // This ensures we know the total needed before we start assigning
418    let char_mapping_counts: Vec<(char, usize)> = charset_chars.iter().map(|&ch| {
419        // Use a char-specific RNG to determine mapping count deterministically
420        let mut count_rng = SeededRandom::new(&format!("{}-count-{}", seed, ch as u32));
421        let count = count_rng.next_range(config.min_mappings, config.max_mappings);
422        (ch, count)
423    }).collect();
424    
425    // Calculate total mappings needed
426    let total_mappings_needed: usize = char_mapping_counts.iter().map(|(_, count)| count).sum();
427    
428    // Ensure word pool is large enough (variable based on temperature)
429    // Generate more words if needed, but keep it deterministic
430    // CRITICAL: All mappings must be at least 2 characters
431    let min_extend_length = config.min_length.max(2);
432    let mut extended_word_pool = word_pool.clone();
433    if extended_word_pool.len() < total_mappings_needed {
434        let needed = total_mappings_needed - extended_word_pool.len();
435        let mut extend_rng = SeededRandom::new(&format!("{}-extend", seed));
436        let additional = generate_random_strings(
437            needed * 3, // Generate extra to account for uniqueness checks and filtering
438            min_extend_length,
439            config.max_length,
440            &mut extend_rng,
441        );
442        // Filter and collect additional words (must be at least 2 chars)
443        let mut filtered_additional: Vec<String> = additional.into_iter()
444            .filter(|word| {
445                word.len() >= 2 // Absolute minimum to avoid charset collisions
446                    && word.len() >= min_extend_length 
447                    && word.len() <= config.max_length
448                    && !word.chars().any(|c| c == '\x1F' || (c < ' ' && c != '\t' && c != '\n'))
449                    && !extended_word_pool.contains(word)
450            })
451            .collect();
452        
453        // Sort before adding to ensure deterministic order
454        filtered_additional.sort();
455        extended_word_pool.extend(filtered_additional.into_iter().take(total_mappings_needed * 2));
456    }
457    
458    // Sort word pool for deterministic selection order
459    extended_word_pool.sort();
460    
461    // Now assign mappings deterministically
462    let mut word_pool_index = 0;
463    for (char, num_mappings) in char_mapping_counts {
464        let mut mappings: Vec<String> = Vec::with_capacity(num_mappings);
465        
466        // Try to get mappings from the word pool first (deterministic order)
467        while mappings.len() < num_mappings && word_pool_index < extended_word_pool.len() {
468            let word = &extended_word_pool[word_pool_index];
469            word_pool_index += 1;
470            
471            // Ensure mapping is unique across ALL characters
472            if !used_mappings.contains(word) {
473                mappings.push(word.clone());
474                used_mappings.insert(word.clone());
475            }
476        }
477        
478        // Fallback: generate random strings if we don't have enough from pool
479        // Use a char-specific RNG for fallback to ensure determinism
480        // CRITICAL: Minimum length of 2 to avoid charset collisions
481        let min_fallback_length = config.min_length.max(2);
482        let mut fallback_rng = SeededRandom::new(&format!("{}-fallback-{}", seed, char as u32));
483        let mut fallback_attempts = 0;
484        while mappings.len() < num_mappings && fallback_attempts < num_mappings * 100 {
485            fallback_attempts += 1;
486            let random_words = generate_random_strings(1, min_fallback_length, config.max_length, &mut fallback_rng);
487            if let Some(word) = random_words.into_iter().next() {
488                // Double-check: ensure word is at least 2 chars, doesn't contain delimiter or control chars
489                if word.len() >= 2
490                    && !word.contains('\x1F') 
491                    && !word.chars().any(|c| c < ' ' && c != '\t' && c != '\n')
492                    && !used_mappings.contains(&word) {
493                    used_mappings.insert(word.clone());
494                    mappings.push(word);
495                }
496            }
497        }
498        
499        // Sort mappings for determinism (same seed should produce same map)
500        mappings.sort();
501        map.insert(char, mappings);
502    }
503    
504    map
505}
506
507/// Analyze a map and return statistics
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct MapAnalysis {
510    /// Estimated temperature based on characteristics
511    pub temperature: f64,
512    /// Total number of mappings across all characters
513    pub total_mappings: usize,
514    /// Average mappings per character
515    pub average_mappings_per_char: f64,
516    /// Average length of mappings
517    pub average_mapping_length: f64,
518    /// Expansion ratio
519    pub expansion_ratio: f64,
520    /// Compute score
521    pub compute_score: u32,
522    /// Entropy
523    pub entropy: f64,
524}
525
526/// Analyze an obfuscation map
527pub fn analyze_map(map: &ObfuscationMap) -> MapAnalysis {
528    let mut total_mappings = 0;
529    let mut total_mapping_length = 0;
530    let mut entropy = 0.0;
531
532    for mappings in map.values() {
533        total_mappings += mappings.len();
534        total_mapping_length += mappings.iter().map(|m| m.len()).sum::<usize>();
535
536        if !mappings.is_empty() {
537            let p = 1.0 / mappings.len() as f64;
538            entropy += mappings.len() as f64 * (-p * p.log2());
539        }
540    }
541
542    let char_count = map.len();
543    let average_mappings_per_char = if char_count > 0 {
544        total_mappings as f64 / char_count as f64
545    } else {
546        0.0
547    };
548    
549    let average_mapping_length = if total_mappings > 0 {
550        total_mapping_length as f64 / total_mappings as f64
551    } else {
552        0.0
553    };
554    
555    let expansion_ratio = average_mapping_length;
556
557    // Estimate temperature based on characteristics
558    let estimated_temperature = ((average_mappings_per_char - 1.0) / 10.0 + expansion_ratio / 10.0)
559        .clamp(0.0, 1.0);
560
561    let compute_score = (average_mappings_per_char.log2() * 10.0 + average_mapping_length * 2.0) as u32;
562
563    MapAnalysis {
564        temperature: estimated_temperature,
565        total_mappings,
566        average_mappings_per_char,
567        average_mapping_length,
568        expansion_ratio,
569        compute_score,
570        entropy: entropy / char_count as f64,
571    }
572}
573
574/// Get expansion ratio estimate for a map
575pub fn get_expansion_ratio(map: &ObfuscationMap) -> f64 {
576    let mut total_original = 0;
577    let mut total_mapping = 0.0;
578
579    for (char, mappings) in map.iter() {
580        total_original += char.len_utf8();
581        if !mappings.is_empty() {
582            let avg_len: f64 = mappings.iter().map(|m| m.len() as f64).sum::<f64>() 
583                / mappings.len() as f64;
584            total_mapping += avg_len;
585        }
586    }
587
588    if total_original > 0 {
589        total_mapping / total_original as f64
590    } else {
591        1.0
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598
599    #[test]
600    fn test_generate_map_default() {
601        let map = generate_map(None);
602        
603        // Should have mappings for default charset
604        assert!(!map.is_empty());
605        
606        // Each char should have at least one mapping
607        for mappings in map.values() {
608            assert!(!mappings.is_empty());
609        }
610    }
611
612    #[test]
613    fn test_generate_map_deterministic() {
614        let opts = GenerateMapOptions {
615            temperature: 0.5,
616            seed: Some("test-seed-123".to_string()),
617            charset: Some("abc".to_string()),
618        };
619
620        let map1 = generate_map(Some(opts.clone()));
621        let map2 = generate_map(Some(opts));
622
623        // Same seed should produce same map
624        assert_eq!(map1, map2);
625    }
626
627    #[test]
628    fn test_generate_map_unique_mappings() {
629        let map = generate_map(Some(GenerateMapOptions {
630            temperature: 0.5,
631            seed: Some("unique-test".to_string()),
632            charset: Some("abcdef".to_string()),
633        }));
634
635        // Collect all mappings
636        let mut all_mappings: Vec<&String> = Vec::new();
637        for mappings in map.values() {
638            all_mappings.extend(mappings.iter());
639        }
640
641        // Check uniqueness
642        let mut seen: std::collections::HashSet<&String> = std::collections::HashSet::new();
643        for mapping in &all_mappings {
644            assert!(!seen.contains(mapping), "Duplicate mapping found: {}", mapping);
645            seen.insert(mapping);
646        }
647    }
648
649    #[test]
650    fn test_temperature_profiles() {
651        assert!(get_temperature_profile("minimal").is_some());
652        assert!(get_temperature_profile("low").is_some());
653        assert!(get_temperature_profile("medium").is_some());
654        assert!(get_temperature_profile("high").is_some());
655        assert!(get_temperature_profile("extreme").is_some());
656        assert!(get_temperature_profile("invalid").is_none());
657    }
658
659    #[test]
660    fn test_analyze_map() {
661        let map = generate_map(Some(GenerateMapOptions {
662            temperature: 0.5,
663            seed: Some("analyze-test".to_string()),
664            charset: Some("abc".to_string()),
665        }));
666
667        let analysis = analyze_map(&map);
668        
669        assert!(analysis.total_mappings > 0);
670        assert!(analysis.average_mappings_per_char > 0.0);
671        assert!(analysis.average_mapping_length > 0.0);
672    }
673
674    #[test]
675    fn test_expansion_ratio() {
676        let map = generate_map(Some(GenerateMapOptions {
677            temperature: 0.5,
678            seed: Some("ratio-test".to_string()),
679            charset: Some("ab".to_string()),
680        }));
681
682        let ratio = get_expansion_ratio(&map);
683        
684        // Ratio should be positive
685        assert!(ratio > 0.0);
686    }
687}
688