p2p_foundation/bootstrap/
words.rs

1//! Three-Word Address System
2//!
3//! Converts complex multiaddrs into memorable three-word combinations for human-friendly
4//! peer discovery and sharing. Inspired by what3words but designed specifically for
5//! P2P network bootstrap addresses.
6//!
7//! Example: `/ip6/2001:db8::1/udp/9000/quic` ↔ `ocean.thunder.falcon`
8
9use crate::{Multiaddr, P2PError, Result};
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::str::FromStr;
13
14/// Maximum number of words per position in the dictionary
15const WORDS_PER_POSITION: usize = 2048; // 2^11 for efficient bit packing
16
17/// Total combinations available: 2048^3 = ~8.6 billion addresses
18const TOTAL_COMBINATIONS: u64 = (WORDS_PER_POSITION as u64).pow(3);
19
20/// Three-word address representation
21#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
22pub struct ThreeWordAddress {
23    pub first: String,
24    pub second: String, 
25    pub third: String,
26}
27
28impl ThreeWordAddress {
29    /// Create a new three-word address
30    pub fn new(first: String, second: String, third: String) -> Self {
31        Self { first, second, third }
32    }
33    
34    /// Parse from dot-separated string format
35    pub fn from_string(input: &str) -> Result<Self> {
36        let parts: Vec<&str> = input.split('.').collect();
37        if parts.len() != 3 {
38            return Err(P2PError::Bootstrap(
39                format!("Three-word address must have exactly 3 words separated by dots, got: {}", input)
40            ));
41        }
42        
43        Ok(Self {
44            first: parts[0].to_lowercase(),
45            second: parts[1].to_lowercase(),
46            third: parts[2].to_lowercase(),
47        })
48    }
49    
50    /// Convert to dot-separated string format
51    pub fn to_string(&self) -> String {
52        format!("{}.{}.{}", self.first, self.second, self.third)
53    }
54    
55    /// Validate that all words exist in the dictionary
56    pub fn validate(&self, encoder: &WordEncoder) -> Result<()> {
57        encoder.validate_words(&self.first, &self.second, &self.third)
58    }
59}
60
61impl std::fmt::Display for ThreeWordAddress {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        write!(f, "{}", self.to_string())
64    }
65}
66
67impl FromStr for ThreeWordAddress {
68    type Err = P2PError;
69    
70    fn from_str(s: &str) -> Result<Self> {
71        Self::from_string(s)
72    }
73}
74
75/// Word dictionary for three-word address encoding
76#[derive(Debug, Clone)]
77pub struct WordDictionary {
78    /// Context words (position 1): geographic, network type
79    context_words: Vec<String>,
80    /// Quality words (position 2): performance, purpose, status  
81    quality_words: Vec<String>,
82    /// Identity words (position 3): nature, objects, abstract concepts
83    identity_words: Vec<String>,
84    
85    /// Reverse lookup maps
86    context_map: HashMap<String, usize>,
87    quality_map: HashMap<String, usize>,
88    identity_map: HashMap<String, usize>,
89}
90
91impl WordDictionary {
92    /// Create a new word dictionary with default English words
93    pub fn new() -> Self {
94        let context_words = Self::default_context_words();
95        let quality_words = Self::default_quality_words();
96        let identity_words = Self::default_identity_words();
97        
98        let context_map: HashMap<String, usize> = context_words
99            .iter()
100            .enumerate()
101            .map(|(i, word)| (word.clone(), i))
102            .collect();
103            
104        let quality_map: HashMap<String, usize> = quality_words
105            .iter()
106            .enumerate() 
107            .map(|(i, word)| (word.clone(), i))
108            .collect();
109            
110        let identity_map: HashMap<String, usize> = identity_words
111            .iter()
112            .enumerate()
113            .map(|(i, word)| (word.clone(), i))
114            .collect();
115        
116        Self {
117            context_words,
118            quality_words,
119            identity_words,
120            context_map,
121            quality_map,
122            identity_map,
123        }
124    }
125    
126    /// Get word by position and index
127    pub fn get_word(&self, position: usize, index: usize) -> Option<&String> {
128        match position {
129            0 => self.context_words.get(index),
130            1 => self.quality_words.get(index), 
131            2 => self.identity_words.get(index),
132            _ => None,
133        }
134    }
135    
136    /// Get index by position and word
137    pub fn get_index(&self, position: usize, word: &str) -> Option<usize> {
138        let word_lower = word.to_lowercase();
139        match position {
140            0 => self.context_map.get(&word_lower).copied(),
141            1 => self.quality_map.get(&word_lower).copied(),
142            2 => self.identity_map.get(&word_lower).copied(),
143            _ => None,
144        }
145    }
146    
147    /// Validate that a word exists in the specified position
148    pub fn validate_word(&self, position: usize, word: &str) -> bool {
149        self.get_index(position, word).is_some()
150    }
151    
152    /// Get all words for a specific position
153    pub fn get_words_for_position(&self, position: usize) -> Option<&Vec<String>> {
154        match position {
155            0 => Some(&self.context_words),
156            1 => Some(&self.quality_words),
157            2 => Some(&self.identity_words),
158            _ => None,
159        }
160    }
161    
162    /// Default context words (position 1) - geographic and network context
163    fn default_context_words() -> Vec<String> {
164        vec![
165            // Geographic contexts
166            "global", "europe", "america", "asia", "africa", "oceania", "arctic", "pacific",
167            "atlantic", "indian", "mountain", "desert", "forest", "urban", "rural", "coastal",
168            "island", "valley", "plateau", "tundra", "savanna", "jungle", "prairie", "canyon",
169            
170            // Network contexts  
171            "local", "mesh", "bridge", "gateway", "relay", "hub", "node", "cluster", "edge",
172            "core", "access", "backbone", "fiber", "wireless", "mobile", "fixed", "satellite",
173            "ground", "space", "cloud", "fog", "mist", "clear", "direct", "routed", "switched",
174            
175            // Scale contexts
176            "micro", "mini", "small", "medium", "large", "huge", "giant", "massive", "tiny",
177            "compact", "wide", "narrow", "deep", "shallow", "high", "low", "fast", "slow",
178            
179            // Additional contexts to reach 2048 words
180            "north", "south", "east", "west", "central", "remote", "near", "far", "inner",
181            "outer", "upper", "lower", "front", "back", "left", "right", "home", "work",
182            "school", "public", "private", "open", "closed", "secure", "safe", "quick",
183            "steady", "smooth", "rough", "sharp", "soft", "hard", "light", "dark", "bright",
184            "dim", "warm", "cool", "hot", "cold", "fresh", "old", "new", "modern", "classic",
185        ][..std::cmp::min(WORDS_PER_POSITION, 100)].iter().map(|s| s.to_string()).collect()
186        // Note: This is a starter set - we'd expand to full 2048 words in production
187    }
188    
189    /// Default quality words (position 2) - performance, purpose, status
190    fn default_quality_words() -> Vec<String> {
191        vec![
192            // Performance qualities
193            "fast", "quick", "rapid", "swift", "speedy", "turbo", "hyper", "ultra", "super",
194            "stable", "solid", "steady", "reliable", "robust", "strong", "secure", "safe",
195            "premium", "elite", "pro", "advanced", "expert", "master", "prime", "top", "best",
196            "smooth", "fluid", "agile", "nimble", "efficient", "optimal", "perfect", "ideal",
197            
198            // Purpose qualities
199            "chat", "talk", "voice", "video", "stream", "share", "store", "backup", "sync",
200            "game", "play", "work", "study", "learn", "teach", "create", "build", "design",
201            "connect", "link", "bridge", "tunnel", "route", "switch", "filter", "block",
202            "allow", "grant", "deny", "check", "verify", "trust", "guard", "watch", "monitor",
203            
204            // Status qualities  
205            "active", "live", "online", "ready", "awake", "alert", "busy", "free", "open",
206            "public", "private", "hidden", "visible", "clear", "bright", "sharp", "focused",
207            "verified", "trusted", "known", "famous", "popular", "common", "rare", "unique",
208            "special", "magic", "power", "energy", "force", "strength", "grace", "beauty",
209            
210            // Additional qualities
211            "gentle", "calm", "peaceful", "quiet", "loud", "bold", "brave", "smart", "wise",
212            "clever", "bright", "brilliant", "clear", "pure", "clean", "fresh", "green",
213            "blue", "red", "gold", "silver", "bronze", "crystal", "diamond", "pearl", "ruby",
214        ][..std::cmp::min(WORDS_PER_POSITION, 100)].iter().map(|s| s.to_string()).collect()
215    }
216    
217    /// Default identity words (position 3) - nature, objects, abstract concepts
218    fn default_identity_words() -> Vec<String> {
219        vec![
220            // Nature - Animals
221            "eagle", "falcon", "hawk", "owl", "raven", "swan", "crane", "heron", "robin",
222            "lion", "tiger", "bear", "wolf", "fox", "deer", "elk", "moose", "bison",
223            "whale", "dolphin", "shark", "ray", "octopus", "seal", "penguin", "turtle",
224            "dragon", "phoenix", "griffin", "pegasus", "unicorn", "sphinx", "chimera",
225            
226            // Nature - Plants & Geography
227            "oak", "pine", "maple", "cedar", "willow", "bamboo", "lotus", "rose", "lily",
228            "mountain", "hill", "peak", "summit", "ridge", "valley", "canyon", "cliff",
229            "river", "stream", "lake", "pond", "ocean", "sea", "bay", "inlet", "shore",
230            "forest", "woods", "grove", "meadow", "field", "garden", "oasis", "desert",
231            
232            // Objects - Navigation & Tools
233            "compass", "anchor", "lighthouse", "beacon", "tower", "bridge", "gate", "door",
234            "key", "lock", "sword", "shield", "hammer", "anvil", "forge", "wheel", "gear",
235            "engine", "motor", "spring", "lever", "pulley", "rope", "chain", "cable", "wire",
236            "lens", "mirror", "prism", "crystal", "gem", "jewel", "crown", "ring", "star",
237            
238            // Abstract Concepts
239            "harmony", "balance", "rhythm", "melody", "symphony", "song", "dance", "flight",
240            "journey", "quest", "adventure", "discovery", "treasure", "mystery", "secret",
241            "dream", "vision", "hope", "faith", "trust", "love", "peace", "joy", "bliss",
242            "clarity", "wisdom", "knowledge", "truth", "light", "shadow", "spirit", "soul",
243            "essence", "core", "heart", "mind", "thought", "idea", "spark", "flame", "fire",
244        ][..std::cmp::min(WORDS_PER_POSITION, 100)].iter().map(|s| s.to_string()).collect()
245    }
246}
247
248impl Default for WordDictionary {
249    fn default() -> Self {
250        Self::new()
251    }
252}
253
254/// Main encoder/decoder for three-word addresses
255#[derive(Debug, Clone)]
256pub struct WordEncoder {
257    dictionary: WordDictionary,
258}
259
260impl WordEncoder {
261    /// Create a new word encoder with default dictionary
262    pub fn new() -> Self {
263        Self {
264            dictionary: WordDictionary::new(),
265        }
266    }
267    
268    /// Create encoder with custom dictionary
269    pub fn with_dictionary(dictionary: WordDictionary) -> Self {
270        Self { dictionary }
271    }
272    
273    /// Convert multiaddr to three-word address
274    pub fn encode_multiaddr(&self, multiaddr: &Multiaddr) -> Result<ThreeWordAddress> {
275        // Convert multiaddr to a consistent hash/fingerprint
276        let multiaddr_str = multiaddr.to_string();
277        let hash = self.hash_multiaddr(&multiaddr_str);
278        
279        // Extract three indices from the hash
280        let (context_idx, quality_idx, identity_idx) = self.extract_indices(hash);
281        
282        // Get words from dictionary
283        let first = self.dictionary.get_word(0, context_idx)
284            .ok_or_else(|| P2PError::Bootstrap("Context word index out of range".to_string()))?
285            .clone();
286            
287        let second = self.dictionary.get_word(1, quality_idx)
288            .ok_or_else(|| P2PError::Bootstrap("Quality word index out of range".to_string()))?
289            .clone();
290            
291        let third = self.dictionary.get_word(2, identity_idx)
292            .ok_or_else(|| P2PError::Bootstrap("Identity word index out of range".to_string()))?
293            .clone();
294        
295        Ok(ThreeWordAddress::new(first, second, third))
296    }
297    
298    /// Convert three-word address back to multiaddr
299    /// Note: This requires a registry/cache since the conversion isn't perfectly reversible
300    pub fn decode_to_multiaddr(&self, words: &ThreeWordAddress) -> Result<Multiaddr> {
301        // For now, return an error indicating this needs a registry lookup
302        // In a real implementation, this would query a distributed registry
303        Err(P2PError::Bootstrap(
304            "Multiaddr decoding requires registry lookup - not yet implemented".to_string()
305        ))
306    }
307    
308    /// Validate that all three words exist in the dictionary
309    pub fn validate_words(&self, first: &str, second: &str, third: &str) -> Result<()> {
310        if !self.dictionary.validate_word(0, first) {
311            return Err(P2PError::Bootstrap(format!("Unknown context word: {}", first)));
312        }
313        
314        if !self.dictionary.validate_word(1, second) {
315            return Err(P2PError::Bootstrap(format!("Unknown quality word: {}", second)));
316        }
317        
318        if !self.dictionary.validate_word(2, third) {
319            return Err(P2PError::Bootstrap(format!("Unknown identity word: {}", third)));
320        }
321        
322        Ok(())
323    }
324    
325    /// Get the word dictionary
326    pub fn dictionary(&self) -> &WordDictionary {
327        &self.dictionary
328    }
329    
330    /// Generate a consistent hash from multiaddr string
331    fn hash_multiaddr(&self, multiaddr: &str) -> u64 {
332        use std::collections::hash_map::DefaultHasher;
333        use std::hash::{Hash, Hasher};
334        
335        let mut hasher = DefaultHasher::new();
336        multiaddr.hash(&mut hasher);
337        hasher.finish()
338    }
339    
340    /// Extract three indices from hash for word lookup
341    fn extract_indices(&self, hash: u64) -> (usize, usize, usize) {
342        // Use different parts of the hash for each word position
343        // Ensure indices are within the actual dictionary size
344        let context_size = self.dictionary.context_words.len();
345        let quality_size = self.dictionary.quality_words.len();
346        let identity_size = self.dictionary.identity_words.len();
347        
348        let context_idx = (hash as usize) % context_size;
349        let quality_idx = ((hash >> 16) as usize) % quality_size;
350        let identity_idx = ((hash >> 32) as usize) % identity_size;
351        
352        (context_idx, quality_idx, identity_idx)
353    }
354}
355
356impl Default for WordEncoder {
357    fn default() -> Self {
358        Self::new()
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365    
366    #[test]
367    fn test_three_word_address_parsing() {
368        let addr = ThreeWordAddress::from_string("ocean.thunder.falcon").unwrap();
369        assert_eq!(addr.first, "ocean");
370        assert_eq!(addr.second, "thunder");
371        assert_eq!(addr.third, "falcon");
372        assert_eq!(addr.to_string(), "ocean.thunder.falcon");
373    }
374    
375    #[test]
376    fn test_three_word_address_validation() {
377        let words = ThreeWordAddress::new("global".to_string(), "fast".to_string(), "eagle".to_string());
378        let encoder = WordEncoder::new();
379        
380        // Should pass validation since these are real words in our dictionary
381        assert!(words.validate(&encoder).is_ok());
382        
383        // Should fail with invalid word
384        let bad_words = ThreeWordAddress::new("invalid".to_string(), "words".to_string(), "here".to_string());
385        assert!(bad_words.validate(&encoder).is_err());
386    }
387    
388    #[test]
389    fn test_multiaddr_encoding() {
390        let encoder = WordEncoder::new();
391        let multiaddr = "/ip6/2001:db8::1/udp/9000/quic".parse().unwrap();
392        
393        let words = encoder.encode_multiaddr(&multiaddr).unwrap();
394        
395        // Should produce valid three-word address
396        assert!(!words.first.is_empty());
397        assert!(!words.second.is_empty());
398        assert!(!words.third.is_empty());
399        
400        // Should validate successfully
401        assert!(words.validate(&encoder).is_ok());
402        
403        // Same multiaddr should always produce same words (deterministic)
404        let words2 = encoder.encode_multiaddr(&multiaddr).unwrap();
405        assert_eq!(words, words2);
406    }
407    
408    #[test]
409    fn test_word_dictionary() {
410        let dict = WordDictionary::new();
411        
412        // Should have words in all positions
413        assert!(!dict.context_words.is_empty());
414        assert!(!dict.quality_words.is_empty());
415        assert!(!dict.identity_words.is_empty());
416        
417        // Should be able to lookup words
418        assert!(dict.validate_word(0, "global"));
419        assert!(dict.validate_word(1, "fast"));
420        assert!(dict.validate_word(2, "eagle"));
421        
422        // Should reject invalid words
423        assert!(!dict.validate_word(0, "nonexistent"));
424    }
425    
426    #[test]
427    fn test_deterministic_encoding() {
428        let encoder = WordEncoder::new();
429        
430        // Test multiple multiaddrs to ensure consistency
431        let addrs = vec![
432            "/ip6/2001:db8::1/udp/9000/quic",
433            "/ip6/::1/tcp/8000",
434            "/ip4/192.168.1.1/udp/5000/quic",
435        ];
436        
437        for addr_str in addrs {
438            let multiaddr: Multiaddr = addr_str.parse().unwrap();
439            
440            // Encode multiple times - should always get same result
441            let words1 = encoder.encode_multiaddr(&multiaddr).unwrap();
442            let words2 = encoder.encode_multiaddr(&multiaddr).unwrap();
443            let words3 = encoder.encode_multiaddr(&multiaddr).unwrap();
444            
445            assert_eq!(words1, words2);
446            assert_eq!(words2, words3);
447            
448            println!("{} -> {}", addr_str, words1);
449        }
450    }
451}