cdns_rs/external/rust_punycode/
mod.rs

1//! Fonctions to decode and encode [RFC-3492 Punycode](https://tools.ietf.org/html/rfc3492).
2
3use crate::{common, internal_error, CDnsErrorType, CDnsResult};
4
5// See [RFC-3492, section 4](https://tools.ietf.org/html/rfc3492#section-4).
6const BASE         : u32 = 36;
7const TMIN         : u32 = 1;
8const TMAX         : u32 = 26;
9const SKEW         : u32 = 38;
10const DAMP         : u32 = 700;
11const INITIAL_BIAS : u32 = 72;
12const INITIAL_N    : u32 = 128;
13const DELIMITER    : char = '-';
14
15/// Decode the string as Punycode. The string should not contain the initial `xn--` and must
16/// contain only ASCII characters.
17/// # Example
18/// ```
19/// assert_eq!(
20///     punycode::decode("acadmie-franaise-npb1a").unwrap(),
21///     "académie-française"
22/// );
23/// ```
24pub 
25fn decode(input: &str) -> CDnsResult<String> 
26{
27    if !input.is_ascii() 
28    {
29        internal_error!(CDnsErrorType::PunycodeNotAscii, "{} is not valid punycode", 
30            common::sanitize_str_unicode(input));
31    }
32
33    let input = 
34        if input.starts_with("xn-") == true
35        {
36            &input[3..]
37        }
38        else
39        {
40            input
41        };
42
43    let mut n = INITIAL_N;
44    let mut i = 0;
45    let mut bias = INITIAL_BIAS;
46
47    let (mut output, input) = 
48        if let Some(i) = input.rfind(DELIMITER) 
49        {
50            (input[0..i].chars().collect(), &input[i+1..])
51        }
52        else 
53        {
54            (vec![], &input[..])
55        };
56
57    let mut it = input.chars().peekable();
58
59    while it.peek() != None 
60    {
61        let oldi = i;
62        let mut w = 1;
63
64        for k in 1.. 
65        {
66            let c = 
67                if let Some(c) = it.next() 
68                {
69                    c
70                }
71                else 
72                {
73                    internal_error!(CDnsErrorType::DnsResponse, 
74                        "{} punycode decode codepoint unexpected EOF, k={}", common::sanitize_str_unicode(input),
75                        k);
76                };
77
78            let k = k*BASE;
79
80            let digit = decode_digit(c);
81
82            if digit == BASE 
83            {
84                internal_error!(CDnsErrorType::DnsResponse, 
85                        "{} punycode decode, decoded digit '{}', eq BASE '{}'", 
86                        common::sanitize_str_unicode(input), digit, BASE);
87            }
88
89            // overflow check
90            if digit > (std::u32::MAX - i) / w 
91            {
92                internal_error!(CDnsErrorType::DnsResponse, 
93                        "{} punycode decode, digit '{}' overflow {}", 
94                        common::sanitize_str_unicode(input), digit, digit > (std::u32::MAX - i) / w);
95            }
96
97            i += digit * w;
98
99            let t = clamped_sub(TMIN, k, bias, TMAX);
100            if digit < t 
101            {
102                break;
103            }
104
105            // overflow check
106            if BASE > (std::u32::MAX - t) / w 
107            {
108                internal_error!(CDnsErrorType::DnsResponse, 
109                        "{} punycode decode, BASE '{}' overflow {}", 
110                        common::sanitize_str_unicode(input), BASE, (std::u32::MAX - t) / w);
111            }
112
113            w *= BASE - t;
114        }
115
116        let len = (output.len() + 1) as u32;
117        bias = adapt(i - oldi, len, oldi == 0);
118
119        let il = i / len;
120
121        // overflow check
122        if n > std::u32::MAX - il 
123        {
124            internal_error!(CDnsErrorType::DnsResponse, 
125                "{} punycode decode, n '{}' overflow {}", 
126                common::sanitize_str_unicode(input), n, std::u32::MAX - il);
127        }
128        n += il;
129        i %= len;
130
131        if let Some(c) = std::char::from_u32(n) 
132        {
133            output.insert(i as usize, c);
134        }
135        else 
136        {
137            internal_error!(CDnsErrorType::DnsResponse, 
138                "{} punycode decode, cannot cast n={} to char", 
139                common::sanitize_str_unicode(input), n);
140        }
141
142        i += 1;
143    }
144
145    Ok(output.iter().cloned().collect())
146}
147
148/// Encode a string as punycode. The result string will contain only ASCII characters. The result
149/// string does not start with `xn--`.
150/// # Example
151/// ```
152/// assert_eq!(
153///     punycode::encode("académie-française").unwrap(),
154///     "acadmie-franaise-npb1a"
155/// );
156/// ```
157pub 
158fn encode<I: AsRef<str>>(ref_input: I, add_xn: bool) -> CDnsResult<String> 
159{
160    let input = ref_input.as_ref().chars().collect::<Vec<char>>();
161
162    let mut n = INITIAL_N;
163    let mut delta = 0;
164    let mut bias = INITIAL_BIAS;
165
166    let mut output = 
167        input
168            .iter()
169            .filter(|&&c| c.is_ascii())
170            .cloned()
171            .collect::<String>();
172
173    
174
175    let mut h = output.len() as u32;
176    let b = h;
177
178    if b > 0 || add_xn == true
179    {
180        output.push(DELIMITER)
181    }
182
183    while h < input.len() as u32 
184    {
185        let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32;
186
187        if m - n > (std::u32::MAX - delta) / (h + 1) 
188        {
189            internal_error!(CDnsErrorType::DnsResponse, 
190                "{} punycode encode, m - n: {} overflow {} ", 
191                common::sanitize_str_unicode(ref_input.as_ref()), m-n, (std::u32::MAX - delta) / (h + 1));
192        }
193
194        delta += (m - n) * (h + 1);
195
196        n = m;
197
198        for c in input.iter().map(|c| *c as u32)
199        {
200            //let c = c as u32;
201
202            if c < n 
203            {
204                delta += 1;
205            }
206            else if c == n 
207            {
208                let mut q = delta;
209
210                for k in 1.. 
211                {
212                    let k = k*BASE;
213
214                    let t = clamped_sub(TMIN, k, bias, TMAX);
215
216                    if q < t 
217                    {
218                        break;
219                    }
220
221                    output.push(encode_digit(t + (q - t) % (BASE - t)));
222
223                    q = (q - t) / (BASE - t);
224                }
225
226                output.push(encode_digit(q));
227
228                bias = adapt(delta, h+1, h == b);
229                delta = 0;
230                h += 1;
231            }
232        }
233
234        delta += 1;
235        n += 1;
236    }
237
238    if add_xn == true
239    {
240        output.insert_str(0, "xn-");
241    }
242
243    return Ok(output);
244}
245
246
247
248fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32 
249{
250    let mut delta = 
251        if firsttime 
252        {
253            delta / DAMP
254        }
255        else 
256        {
257            delta / 2
258        };
259
260    delta += delta / numpoint;
261    let mut k = 0;
262
263    while delta > (BASE - TMIN) * TMAX / 2 
264    {
265        delta /= BASE - TMIN;
266        k += BASE
267    }
268
269    k + (BASE - TMIN + 1) * delta / (delta + SKEW)
270}
271
272/// Compute `lhs-rhs`. Result will be clamped in [min, max].
273fn clamped_sub<T>(min: T, lhs: T, rhs: T, max: T) -> T
274where T : Ord
275        + std::ops::Add<Output=T>
276        + std::ops::Sub<Output=T>
277        + Copy
278{
279    if min + rhs >= lhs 
280    {
281        min 
282    }
283    else if max + rhs <= lhs 
284    { 
285        max 
286    }
287    else 
288    { 
289        lhs - rhs 
290    }
291}
292
293fn decode_digit(c: char) -> u32 
294{
295    let cp = c as u32;
296
297    match c 
298    {
299        '0' ..= '9' => cp - ('0' as u32) + 26,
300        'A' ..= 'Z' => cp - ('A' as u32),
301        'a' ..= 'z' => cp - ('a' as u32),
302        _ => BASE,
303    }
304}
305
306fn encode_digit(d: u32) -> char 
307{
308    let r = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char;
309
310    assert!(('0' <= r && r <= '9') || ('a' <= r && r <= 'z'), "r = {}", r);
311
312    r
313}
314
315#[cfg(test)]
316mod tests
317{
318    use super::*;
319
320    static TESTS: &'static [(&'static str, &'static str)] = &[
321        // examples taken from [RCF-3492, section 7.1](https://tools.ietf.org/html/rfc3492#section-7.1)
322        (&"\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\
323        \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}",
324        &"egbpdaj6bu4bxfgehfvwxn"),
325
326        (&"\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}",
327        &"ihqwcrb4cv8a8dqg056pqjye"),
328
329        (&"\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}",
330        &"ihqwctvzc91f659drss3x8bo0yb"),
331
332        (&"\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\
333        \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\
334        \u{0065}\u{0073}\u{006B}\u{0079}",
335        &"Proprostnemluvesky-uyb24dma41a"),
336
337        (&"\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\
338        \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\
339        \u{05D1}\u{05E8}\u{05D9}\u{05EA}",
340        &"4dbcagdahymbxekheh6e0a7fei0b"),
341
342        (&"\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\
343        \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\
344        \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\
345        \u{0939}\u{0948}\u{0902}",
346        &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
347
348        (&"\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\
349        \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}",
350        &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
351
352        (&"\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\
353        \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\
354        \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}",
355        &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"),
356
357        (&"\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\
358        \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\
359        \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\
360        \u{0438}",
361        &"b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
362
363        (&"\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\
364        \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\
365        \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\
366        \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\
367        \u{0061}\u{00F1}\u{006F}\u{006C}",
368        &"PorqunopuedensimplementehablarenEspaol-fmd56a"),
369
370        (&"\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\
371        \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\
372        \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\
373        \u{0056}\u{0069}\u{1EC7}\u{0074}",
374        &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
375
376        (&"\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}",
377        &"3B-ww4c5e180e575a65lsy2b"),
378
379        (&"\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\
380        \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\
381        \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}",
382        &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
383
384        (&"\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\
385        \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\
386        \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}",
387        &"Hello-Another-Way--fc4qua05auwb3674vfr0b"),
388
389        (&"\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}",
390        &"2-u9tlzr9756bt3uc0v"),
391
392        (&"\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\
393        \u{308B}\u{0035}\u{79D2}\u{524D}",
394        &"MajiKoi5-783gue6qz075azm5e"),
395
396        (&"\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}",
397        &"de-jg4avhby1noc0d"),
398
399        (&"\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}",
400        &"d9juau41awczczp"),
401
402        (&"\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\
403        \u{003C}\u{002D}",
404        &"-> $1.00 <--"),
405
406        // some real-life examples
407        (&"académie-française", &"acadmie-franaise-npb1a"),
408        (&"bücher", &"bcher-kva"),
409        (&"république-numérique", &"rpublique-numrique-bwbm"),
410
411        // some real-life TLD
412        (&"бг",       &"90ae"),
413        (&"рф",       &"p1ai"),
414        (&"укр",      &"j1amh"),
415        (&"السعودية", &"mgberp4a5d4ar"),
416        (&"امارات",   &"mgbaam7a8h"),
417        (&"مصر",      &"wgbh1c"),
418        (&"中国",     &"fiqs8s"),
419        (&"中國",     &"fiqz9s"),
420        (&"台湾",     &"kprw13d"),
421        (&"台灣",     &"kpry57d"),
422        (&"香港",     &"j6w193g"),
423
424        // other
425        (&"", &""),
426        (&"a", &"a-"),
427        (&"0", &"0-"),
428        (&"A", &"A-"),
429        (&"é", &"9ca"),
430        (&"\n", &"\n-"),
431    ];
432
433    #[test]
434    fn test_decode() 
435    {
436        for t in TESTS 
437        {
438            assert_eq!(decode(&t.1), Ok(t.0.into()));
439        }
440    }
441
442    #[test]
443    fn test_encode() 
444    {
445        for t in TESTS 
446        {
447            assert_eq!(encode(&t.0, false).unwrap().to_lowercase(), t.1.to_lowercase());
448        }
449    }
450
451    #[test]
452    fn test_fail_decode() 
453    {
454        assert_eq!(decode("bcher-kva.ch").is_err(), true);
455        assert_eq!(decode("+").is_err(), true);
456        assert_eq!(decode("\\").is_err(), true);
457        assert_eq!(decode("é").is_err(), true);
458        assert_eq!(decode("99999999").is_err(), true);
459    }
460
461    #[test]
462    fn test_decode_1() 
463    {
464        let enc = encode(&"test", false).unwrap();
465        println!("{}", enc);
466        let dec = decode(enc.as_str()).unwrap();
467
468        assert_eq!(dec, "test");
469    }
470
471    #[test]
472    fn test_encode_decode_idn_1() 
473    {
474        let enc2 = encode(&"テスト", true).unwrap();
475
476        println!("{}", enc2);
477
478        assert_eq!(enc2, "xn--zckzah");
479
480        let dec2 = decode(&enc2).unwrap();
481
482        assert_eq!(dec2, "テスト");
483    }
484
485    #[test]
486    fn test_encode_decode_idn_2() 
487    {
488        let enc2 = encode(&"испытание", true).unwrap();
489
490        println!("{}", enc2);
491
492        assert_eq!(enc2, "xn--80akhbyknj4f");
493
494        let dec2 = decode(&enc2).unwrap();
495
496        assert_eq!(dec2, "испытание");
497    }
498}