cdns_rs/external/rust_punycode/
mod.rs1use crate::{common, internal_error, CDnsErrorType, CDnsResult};
4
5const BASE : u32 = 36;
7const TMIN : u32 = 1;
8const TMAX : u32 = 26;
9const SKEW : u32 = 38;
10const DAMP : u32 = 700;
11const INITIAL_BIAS : u32 = 72;
12const INITIAL_N : u32 = 128;
13const DELIMITER : char = '-';
14
15pub
25fn decode(input: &str) -> CDnsResult<String>
26{
27 if !input.is_ascii()
28 {
29 internal_error!(CDnsErrorType::PunycodeNotAscii, "{} is not valid punycode",
30 common::sanitize_str_unicode(input));
31 }
32
33 let input =
34 if input.starts_with("xn-") == true
35 {
36 &input[3..]
37 }
38 else
39 {
40 input
41 };
42
43 let mut n = INITIAL_N;
44 let mut i = 0;
45 let mut bias = INITIAL_BIAS;
46
47 let (mut output, input) =
48 if let Some(i) = input.rfind(DELIMITER)
49 {
50 (input[0..i].chars().collect(), &input[i+1..])
51 }
52 else
53 {
54 (vec![], &input[..])
55 };
56
57 let mut it = input.chars().peekable();
58
59 while it.peek() != None
60 {
61 let oldi = i;
62 let mut w = 1;
63
64 for k in 1..
65 {
66 let c =
67 if let Some(c) = it.next()
68 {
69 c
70 }
71 else
72 {
73 internal_error!(CDnsErrorType::DnsResponse,
74 "{} punycode decode codepoint unexpected EOF, k={}", common::sanitize_str_unicode(input),
75 k);
76 };
77
78 let k = k*BASE;
79
80 let digit = decode_digit(c);
81
82 if digit == BASE
83 {
84 internal_error!(CDnsErrorType::DnsResponse,
85 "{} punycode decode, decoded digit '{}', eq BASE '{}'",
86 common::sanitize_str_unicode(input), digit, BASE);
87 }
88
89 if digit > (std::u32::MAX - i) / w
91 {
92 internal_error!(CDnsErrorType::DnsResponse,
93 "{} punycode decode, digit '{}' overflow {}",
94 common::sanitize_str_unicode(input), digit, digit > (std::u32::MAX - i) / w);
95 }
96
97 i += digit * w;
98
99 let t = clamped_sub(TMIN, k, bias, TMAX);
100 if digit < t
101 {
102 break;
103 }
104
105 if BASE > (std::u32::MAX - t) / w
107 {
108 internal_error!(CDnsErrorType::DnsResponse,
109 "{} punycode decode, BASE '{}' overflow {}",
110 common::sanitize_str_unicode(input), BASE, (std::u32::MAX - t) / w);
111 }
112
113 w *= BASE - t;
114 }
115
116 let len = (output.len() + 1) as u32;
117 bias = adapt(i - oldi, len, oldi == 0);
118
119 let il = i / len;
120
121 if n > std::u32::MAX - il
123 {
124 internal_error!(CDnsErrorType::DnsResponse,
125 "{} punycode decode, n '{}' overflow {}",
126 common::sanitize_str_unicode(input), n, std::u32::MAX - il);
127 }
128 n += il;
129 i %= len;
130
131 if let Some(c) = std::char::from_u32(n)
132 {
133 output.insert(i as usize, c);
134 }
135 else
136 {
137 internal_error!(CDnsErrorType::DnsResponse,
138 "{} punycode decode, cannot cast n={} to char",
139 common::sanitize_str_unicode(input), n);
140 }
141
142 i += 1;
143 }
144
145 Ok(output.iter().cloned().collect())
146}
147
148pub
158fn encode<I: AsRef<str>>(ref_input: I, add_xn: bool) -> CDnsResult<String>
159{
160 let input = ref_input.as_ref().chars().collect::<Vec<char>>();
161
162 let mut n = INITIAL_N;
163 let mut delta = 0;
164 let mut bias = INITIAL_BIAS;
165
166 let mut output =
167 input
168 .iter()
169 .filter(|&&c| c.is_ascii())
170 .cloned()
171 .collect::<String>();
172
173
174
175 let mut h = output.len() as u32;
176 let b = h;
177
178 if b > 0 || add_xn == true
179 {
180 output.push(DELIMITER)
181 }
182
183 while h < input.len() as u32
184 {
185 let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32;
186
187 if m - n > (std::u32::MAX - delta) / (h + 1)
188 {
189 internal_error!(CDnsErrorType::DnsResponse,
190 "{} punycode encode, m - n: {} overflow {} ",
191 common::sanitize_str_unicode(ref_input.as_ref()), m-n, (std::u32::MAX - delta) / (h + 1));
192 }
193
194 delta += (m - n) * (h + 1);
195
196 n = m;
197
198 for c in input.iter().map(|c| *c as u32)
199 {
200 if c < n
203 {
204 delta += 1;
205 }
206 else if c == n
207 {
208 let mut q = delta;
209
210 for k in 1..
211 {
212 let k = k*BASE;
213
214 let t = clamped_sub(TMIN, k, bias, TMAX);
215
216 if q < t
217 {
218 break;
219 }
220
221 output.push(encode_digit(t + (q - t) % (BASE - t)));
222
223 q = (q - t) / (BASE - t);
224 }
225
226 output.push(encode_digit(q));
227
228 bias = adapt(delta, h+1, h == b);
229 delta = 0;
230 h += 1;
231 }
232 }
233
234 delta += 1;
235 n += 1;
236 }
237
238 if add_xn == true
239 {
240 output.insert_str(0, "xn-");
241 }
242
243 return Ok(output);
244}
245
246
247
248fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32
249{
250 let mut delta =
251 if firsttime
252 {
253 delta / DAMP
254 }
255 else
256 {
257 delta / 2
258 };
259
260 delta += delta / numpoint;
261 let mut k = 0;
262
263 while delta > (BASE - TMIN) * TMAX / 2
264 {
265 delta /= BASE - TMIN;
266 k += BASE
267 }
268
269 k + (BASE - TMIN + 1) * delta / (delta + SKEW)
270}
271
272fn clamped_sub<T>(min: T, lhs: T, rhs: T, max: T) -> T
274where T : Ord
275 + std::ops::Add<Output=T>
276 + std::ops::Sub<Output=T>
277 + Copy
278{
279 if min + rhs >= lhs
280 {
281 min
282 }
283 else if max + rhs <= lhs
284 {
285 max
286 }
287 else
288 {
289 lhs - rhs
290 }
291}
292
293fn decode_digit(c: char) -> u32
294{
295 let cp = c as u32;
296
297 match c
298 {
299 '0' ..= '9' => cp - ('0' as u32) + 26,
300 'A' ..= 'Z' => cp - ('A' as u32),
301 'a' ..= 'z' => cp - ('a' as u32),
302 _ => BASE,
303 }
304}
305
306fn encode_digit(d: u32) -> char
307{
308 let r = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char;
309
310 assert!(('0' <= r && r <= '9') || ('a' <= r && r <= 'z'), "r = {}", r);
311
312 r
313}
314
315#[cfg(test)]
316mod tests
317{
318 use super::*;
319
320 static TESTS: &'static [(&'static str, &'static str)] = &[
321 (&"\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\
323 \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}",
324 &"egbpdaj6bu4bxfgehfvwxn"),
325
326 (&"\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}",
327 &"ihqwcrb4cv8a8dqg056pqjye"),
328
329 (&"\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}",
330 &"ihqwctvzc91f659drss3x8bo0yb"),
331
332 (&"\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\
333 \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\
334 \u{0065}\u{0073}\u{006B}\u{0079}",
335 &"Proprostnemluvesky-uyb24dma41a"),
336
337 (&"\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\
338 \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\
339 \u{05D1}\u{05E8}\u{05D9}\u{05EA}",
340 &"4dbcagdahymbxekheh6e0a7fei0b"),
341
342 (&"\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\
343 \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\
344 \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\
345 \u{0939}\u{0948}\u{0902}",
346 &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
347
348 (&"\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\
349 \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}",
350 &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
351
352 (&"\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\
353 \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\
354 \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}",
355 &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"),
356
357 (&"\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\
358 \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\
359 \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\
360 \u{0438}",
361 &"b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
362
363 (&"\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\
364 \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\
365 \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\
366 \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\
367 \u{0061}\u{00F1}\u{006F}\u{006C}",
368 &"PorqunopuedensimplementehablarenEspaol-fmd56a"),
369
370 (&"\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\
371 \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\
372 \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\
373 \u{0056}\u{0069}\u{1EC7}\u{0074}",
374 &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
375
376 (&"\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}",
377 &"3B-ww4c5e180e575a65lsy2b"),
378
379 (&"\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\
380 \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\
381 \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}",
382 &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
383
384 (&"\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\
385 \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\
386 \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}",
387 &"Hello-Another-Way--fc4qua05auwb3674vfr0b"),
388
389 (&"\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}",
390 &"2-u9tlzr9756bt3uc0v"),
391
392 (&"\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\
393 \u{308B}\u{0035}\u{79D2}\u{524D}",
394 &"MajiKoi5-783gue6qz075azm5e"),
395
396 (&"\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}",
397 &"de-jg4avhby1noc0d"),
398
399 (&"\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}",
400 &"d9juau41awczczp"),
401
402 (&"\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\
403 \u{003C}\u{002D}",
404 &"-> $1.00 <--"),
405
406 (&"académie-française", &"acadmie-franaise-npb1a"),
408 (&"bücher", &"bcher-kva"),
409 (&"république-numérique", &"rpublique-numrique-bwbm"),
410
411 (&"бг", &"90ae"),
413 (&"рф", &"p1ai"),
414 (&"укр", &"j1amh"),
415 (&"السعودية", &"mgberp4a5d4ar"),
416 (&"امارات", &"mgbaam7a8h"),
417 (&"مصر", &"wgbh1c"),
418 (&"中国", &"fiqs8s"),
419 (&"中國", &"fiqz9s"),
420 (&"台湾", &"kprw13d"),
421 (&"台灣", &"kpry57d"),
422 (&"香港", &"j6w193g"),
423
424 (&"", &""),
426 (&"a", &"a-"),
427 (&"0", &"0-"),
428 (&"A", &"A-"),
429 (&"é", &"9ca"),
430 (&"\n", &"\n-"),
431 ];
432
433 #[test]
434 fn test_decode()
435 {
436 for t in TESTS
437 {
438 assert_eq!(decode(&t.1), Ok(t.0.into()));
439 }
440 }
441
442 #[test]
443 fn test_encode()
444 {
445 for t in TESTS
446 {
447 assert_eq!(encode(&t.0, false).unwrap().to_lowercase(), t.1.to_lowercase());
448 }
449 }
450
451 #[test]
452 fn test_fail_decode()
453 {
454 assert_eq!(decode("bcher-kva.ch").is_err(), true);
455 assert_eq!(decode("+").is_err(), true);
456 assert_eq!(decode("\\").is_err(), true);
457 assert_eq!(decode("é").is_err(), true);
458 assert_eq!(decode("99999999").is_err(), true);
459 }
460
461 #[test]
462 fn test_decode_1()
463 {
464 let enc = encode(&"test", false).unwrap();
465 println!("{}", enc);
466 let dec = decode(enc.as_str()).unwrap();
467
468 assert_eq!(dec, "test");
469 }
470
471 #[test]
472 fn test_encode_decode_idn_1()
473 {
474 let enc2 = encode(&"テスト", true).unwrap();
475
476 println!("{}", enc2);
477
478 assert_eq!(enc2, "xn--zckzah");
479
480 let dec2 = decode(&enc2).unwrap();
481
482 assert_eq!(dec2, "テスト");
483 }
484
485 #[test]
486 fn test_encode_decode_idn_2()
487 {
488 let enc2 = encode(&"испытание", true).unwrap();
489
490 println!("{}", enc2);
491
492 assert_eq!(enc2, "xn--80akhbyknj4f");
493
494 let dec2 = decode(&enc2).unwrap();
495
496 assert_eq!(dec2, "испытание");
497 }
498}