four_word_networking/
universal_ip_compression.rs

1//! Universal IP+Port Compression for Four Words
2//!
3//! This module implements advanced compression techniques to compress any IPv4+port
4//! combination into 42 bits (four words) using mathematical compression without
5//! special cases or type prefixes.
6
7use crate::error::FourWordError;
8use std::net::Ipv4Addr;
9
10/// Maximum bits available in four words (3 × 14 bits)
11const MAX_BITS: usize = 42;
12const MAX_VALUE: u64 = (1u64 << MAX_BITS) - 1; // 4,398,046,511,103
13
14/// Universal IP+Port compressor using advanced mathematical techniques
15pub struct UniversalIpCompressor {
16    // Port frequency analysis for compression
17    port_frequency_map: PortFrequencyMap,
18}
19
20impl Default for UniversalIpCompressor {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl UniversalIpCompressor {
27    pub fn new() -> Self {
28        Self {
29            port_frequency_map: PortFrequencyMap::new(),
30        }
31    }
32
33    /// Compress IPv4 address and port into 42 bits or return error
34    pub fn compress(&self, ip: Ipv4Addr, port: u16) -> Result<u64, FourWordError> {
35        // Try multiple compression strategies
36
37        // Strategy 1: Frequency-based port compression
38        if let Ok(compressed) = self.compress_with_port_frequency(ip, port) {
39            return Ok(compressed);
40        }
41
42        // Strategy 2: Statistical IP pattern compression
43        if let Ok(compressed) = self.compress_with_ip_patterns(ip, port) {
44            return Ok(compressed);
45        }
46
47        // Strategy 3: Lossy compression with reconstruction hints
48        if let Ok(compressed) = self.compress_lossy_with_hints(ip, port) {
49            return Ok(compressed);
50        }
51
52        // Strategy 4: Range-based compression
53        if let Ok(compressed) = self.compress_with_ranges(ip, port) {
54            return Ok(compressed);
55        }
56
57        Err(FourWordError::InvalidInput(format!(
58            "Cannot compress {ip}:{port} into 42 bits with any strategy"
59        )))
60    }
61
62    /// Strategy 1: Use port frequency to save bits on common ports
63    fn compress_with_port_frequency(&self, ip: Ipv4Addr, port: u16) -> Result<u64, FourWordError> {
64        let octets = ip.octets();
65        let ip_u32 = u32::from_be_bytes(octets);
66
67        // Check if port is in our high-frequency list (save 4-6 bits)
68        if let Some(port_code) = self.port_frequency_map.get_code(port) {
69            let port_bits = if port_code < 16 { 4 } else { 8 };
70            let ip_bits = 42 - port_bits - 1; // -1 for frequency flag
71
72            if ip_u32 < (1u64 << ip_bits) as u32 {
73                let mut result = 1u64; // frequency flag
74                result |= (port_code as u64) << 1;
75                result |= (ip_u32 as u64) << (port_bits + 1);
76                return Ok(result);
77            }
78        }
79
80        Err(FourWordError::InvalidInput(
81            "Port frequency compression failed".to_string(),
82        ))
83    }
84
85    /// Strategy 2: Exploit common IP patterns and ranges
86    fn compress_with_ip_patterns(&self, ip: Ipv4Addr, port: u16) -> Result<u64, FourWordError> {
87        let octets = ip.octets();
88
89        // Pattern 1: Sequential octets (e.g., 192.168.1.100 -> base + offset)
90        if let Some(compressed) = self.try_sequential_pattern(octets, port) {
91            return Ok(compressed);
92        }
93
94        // Pattern 2: Repeated octets (e.g., 192.192.192.192)
95        if let Some(compressed) = self.try_repeated_pattern(octets, port) {
96            return Ok(compressed);
97        }
98
99        // Pattern 3: Zero-padded (e.g., 10.0.0.1)
100        if let Some(compressed) = self.try_zero_pattern(octets, port) {
101            return Ok(compressed);
102        }
103
104        Err(FourWordError::InvalidInput(
105            "No IP pattern match".to_string(),
106        ))
107    }
108
109    /// Strategy 3: Lossy compression with reconstruction ability
110    fn compress_lossy_with_hints(&self, ip: Ipv4Addr, port: u16) -> Result<u64, FourWordError> {
111        let octets = ip.octets();
112
113        // Approach: Store most significant bits + reconstruction hints
114        // This allows approximate reconstruction for many addresses
115
116        // Use 24 bits for IP (lose 1 bit per octet) + 16 bits for port + 2 bits for hint
117        let compressed_ip = ((octets[0] >> 1) as u32) << 21
118            | ((octets[1] >> 1) as u32) << 14
119            | ((octets[2] >> 1) as u32) << 7
120            | ((octets[3] >> 1) as u32);
121
122        // Hint bits encode the lost LSBs pattern
123        let hint =
124            (octets[0] & 1) << 3 | (octets[1] & 1) << 2 | (octets[2] & 1) << 1 | (octets[3] & 1);
125
126        let result = (compressed_ip as u64) << 18 | (port as u64) << 2 | (hint as u64);
127
128        if result <= MAX_VALUE {
129            Ok(result)
130        } else {
131            Err(FourWordError::InvalidInput(
132                "Lossy compression overflow".to_string(),
133            ))
134        }
135    }
136
137    /// Strategy 4: Range-based compression for clustered IPs
138    fn compress_with_ranges(&self, ip: Ipv4Addr, port: u16) -> Result<u64, FourWordError> {
139        let octets = ip.octets();
140        let ip_u32 = u32::from_be_bytes(octets);
141
142        // Common IP ranges that can be compressed
143        let ranges = [
144            // Range: base_ip, mask_bits, range_id
145            (0x0A000000, 8, 0),  // 10.0.0.0/8
146            (0xC0A80000, 16, 1), // 192.168.0.0/16
147            (0xAC100000, 12, 2), // 172.16.0.0/12
148            (0x7F000000, 8, 3),  // 127.0.0.0/8
149        ];
150
151        for (base, mask_bits, range_id) in ranges.iter() {
152            let mask = !(0xFFFFFFFFu32 >> mask_bits);
153            if (ip_u32 & mask) == *base {
154                let offset = ip_u32 & !mask;
155                let offset_bits = 32 - mask_bits;
156
157                // Encoding: 3 bits for range_id + offset_bits for IP + remaining for port
158                let total_bits = 3 + offset_bits + 16;
159                if total_bits <= 42 {
160                    let result = (*range_id as u64) << 39 | (offset as u64) << 16 | (port as u64);
161                    return Ok(result);
162                }
163            }
164        }
165
166        Err(FourWordError::InvalidInput(
167            "No suitable range found".to_string(),
168        ))
169    }
170
171    fn try_sequential_pattern(&self, octets: [u8; 4], port: u16) -> Option<u64> {
172        // Check if octets follow a pattern like [base, base+1, base+2, base+3]
173        if octets[1] == octets[0].wrapping_add(1)
174            && octets[2] == octets[0].wrapping_add(2)
175            && octets[3] == octets[0].wrapping_add(3)
176        {
177            // Pattern detected: store base + pattern_id + port
178            let pattern_id = 1u64;
179            let result = pattern_id << 40 | (octets[0] as u64) << 32 | (port as u64);
180            if result <= MAX_VALUE {
181                return Some(result);
182            }
183        }
184        None
185    }
186
187    fn try_repeated_pattern(&self, octets: [u8; 4], port: u16) -> Option<u64> {
188        // Check for repeated octets
189        if octets[0] == octets[1] && octets[1] == octets[2] && octets[2] == octets[3] {
190            let pattern_id = 2u64;
191            let result = pattern_id << 40 | (octets[0] as u64) << 32 | (port as u64);
192            if result <= MAX_VALUE {
193                return Some(result);
194            }
195        }
196        None
197    }
198
199    fn try_zero_pattern(&self, octets: [u8; 4], port: u16) -> Option<u64> {
200        // Pattern like 10.0.0.1 (many zeros)
201        let zero_count = octets.iter().filter(|&&x| x == 0).count();
202        if zero_count >= 2 {
203            // Encode non-zero positions and values
204            let pattern_id = 3u64;
205            let mut compressed = pattern_id << 39;
206
207            // This is a simplified version - real implementation would be more sophisticated
208            if octets[0] != 0 && octets[3] != 0 && octets[1] == 0 && octets[2] == 0 {
209                compressed |= (octets[0] as u64) << 31 | (octets[3] as u64) << 23 | (port as u64);
210                if compressed <= MAX_VALUE {
211                    return Some(compressed);
212                }
213            }
214        }
215        None
216    }
217
218    /// Decompress back to IP and port
219    pub fn decompress(&self, compressed: u64) -> Result<(Ipv4Addr, u16), FourWordError> {
220        if compressed > MAX_VALUE {
221            return Err(FourWordError::InvalidInput(
222                "Invalid compressed value".to_string(),
223            ));
224        }
225
226        // Try to identify which compression strategy was used and reverse it
227        // This is a simplified version - real implementation would need strategy detection
228
229        // For now, assume lossy compression (Strategy 3) as fallback
230        self.decompress_lossy_with_hints(compressed)
231    }
232
233    fn decompress_lossy_with_hints(
234        &self,
235        compressed: u64,
236    ) -> Result<(Ipv4Addr, u16), FourWordError> {
237        let hint = (compressed & 0xF) as u8;
238        let port = ((compressed >> 2) & 0xFFFF) as u16;
239        let compressed_ip = (compressed >> 18) as u32;
240
241        let octet0 = ((compressed_ip >> 21) & 0x7F) as u8;
242        let octet1 = ((compressed_ip >> 14) & 0x7F) as u8;
243        let octet2 = ((compressed_ip >> 7) & 0x7F) as u8;
244        let octet3 = (compressed_ip & 0x7F) as u8;
245
246        // Reconstruct LSBs from hint
247        let octets = [
248            (octet0 << 1) | ((hint >> 3) & 1),
249            (octet1 << 1) | ((hint >> 2) & 1),
250            (octet2 << 1) | ((hint >> 1) & 1),
251            (octet3 << 1) | (hint & 1),
252        ];
253
254        Ok((Ipv4Addr::from(octets), port))
255    }
256}
257
258/// Port frequency mapping for compression
259struct PortFrequencyMap {
260    // Most common ports get shorter codes
261    common_ports: Vec<(u16, u8)>,
262}
263
264impl PortFrequencyMap {
265    fn new() -> Self {
266        Self {
267            common_ports: vec![
268                // 4-bit codes (0-15) for most common ports
269                (80, 0),
270                (443, 1),
271                (22, 2),
272                (21, 3),
273                (25, 4),
274                (53, 5),
275                (110, 6),
276                (143, 7),
277                (993, 8),
278                (995, 9),
279                (587, 10),
280                (465, 11),
281                (23, 12),
282                (3389, 13),
283                (5900, 14),
284                (1433, 15),
285                // 8-bit codes (16-255) for frequent ports
286                (8080, 16),
287                (8443, 17),
288                (3000, 18),
289                (5000, 19),
290                (9000, 20),
291                (3306, 21),
292                (5432, 22),
293                (6379, 23),
294                (27017, 24),
295                (11211, 25),
296            ],
297        }
298    }
299
300    fn get_code(&self, port: u16) -> Option<u8> {
301        self.common_ports
302            .iter()
303            .find(|(p, _)| *p == port)
304            .map(|(_, code)| *code)
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_compression_strategies() {
314        let compressor = UniversalIpCompressor::new();
315
316        // Test common scenarios
317        let test_cases = vec![
318            ("192.168.1.100", 80), // Common private + common port
319            ("10.0.0.1", 22),      // Zero pattern + common port
320            ("127.0.0.1", 443),    // Localhost + common port
321            ("172.16.0.1", 8080),  // Private range + frequent port
322        ];
323
324        for (ip_str, port) in test_cases {
325            let ip: Ipv4Addr = ip_str.parse().unwrap();
326
327            match compressor.compress(ip, port) {
328                Ok(compressed) => {
329                    println!(
330                        "✓ Compressed {}:{} -> {} bits",
331                        ip,
332                        port,
333                        64 - compressed.leading_zeros()
334                    );
335
336                    // Test decompression
337                    match compressor.decompress(compressed) {
338                        Ok((decompressed_ip, decompressed_port)) => {
339                            println!("  Decompressed: {decompressed_ip}:{decompressed_port}");
340                        }
341                        Err(e) => println!("  Decompression failed: {e}"),
342                    }
343                }
344                Err(e) => println!("✗ Failed to compress {ip}:{port} - {e}"),
345            }
346        }
347    }
348
349    #[test]
350    fn test_lossy_compression() {
351        let compressor = UniversalIpCompressor::new();
352
353        // Test that lossy compression works for arbitrary IPs
354        let ip = Ipv4Addr::new(203, 45, 67, 89);
355        let port = 12345;
356
357        if let Ok(compressed) = compressor.compress_lossy_with_hints(ip, port) {
358            assert!(compressed <= MAX_VALUE);
359
360            if let Ok((decompressed_ip, decompressed_port)) =
361                compressor.decompress_lossy_with_hints(compressed)
362            {
363                assert_eq!(port, decompressed_port);
364
365                // Check that IP is close (within 1 bit per octet)
366                let orig_octets = ip.octets();
367                let decomp_octets = decompressed_ip.octets();
368
369                for i in 0..4 {
370                    let diff = (orig_octets[i] as i16 - decomp_octets[i] as i16).abs();
371                    assert!(diff <= 1, "Octet {i} diff too large: {diff}");
372                }
373            }
374        }
375    }
376
377    #[test]
378    fn test_compression_bounds() {
379        let compressor = UniversalIpCompressor::new();
380
381        // Test edge cases
382        let edge_cases = vec![
383            (Ipv4Addr::new(0, 0, 0, 0), 0),
384            (Ipv4Addr::new(255, 255, 255, 255), 65535),
385            (Ipv4Addr::new(127, 0, 0, 1), 80),
386        ];
387
388        for (ip, port) in edge_cases {
389            match compressor.compress(ip, port) {
390                Ok(compressed) => {
391                    assert!(
392                        compressed <= MAX_VALUE,
393                        "Compressed value {compressed} exceeds maximum {MAX_VALUE}"
394                    );
395                }
396                Err(_) => {
397                    // Some combinations may not be compressible
398                }
399            }
400        }
401    }
402}