import codecs
def segregate(str):
base = bytearray()
extended = set()
for c in str:
if ord(c) < 128:
base.append(ord(c))
else:
extended.add(c)
extended = sorted(extended)
return bytes(base), extended
def selective_len(str, max):
res = 0
for c in str:
if ord(c) < max:
res += 1
return res
def selective_find(str, char, index, pos):
l = len(str)
while 1:
pos += 1
if pos == l:
return (-1, -1)
c = str[pos]
if c == char:
return index+1, pos
elif c < char:
index += 1
def insertion_unsort(str, extended):
oldchar = 0x80
result = []
oldindex = -1
for c in extended:
index = pos = -1
char = ord(c)
curlen = selective_len(str, char)
delta = (curlen+1) * (char - oldchar)
while 1:
index,pos = selective_find(str,c,index,pos)
if index == -1:
break
delta += index - oldindex
result.append(delta-1)
oldindex = index
delta = 0
oldchar = char
return result
def T(j, bias):
res = 36 * (j + 1) - bias
if res < 1: return 1
if res > 26: return 26
return res
digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
def generate_generalized_integer(N, bias):
result = bytearray()
j = 0
while 1:
t = T(j, bias)
if N < t:
result.append(digits[N])
return bytes(result)
result.append(digits[t + ((N - t) % (36 - t))])
N = (N - t) // (36 - t)
j += 1
def adapt(delta, first, numchars):
if first:
delta //= 700
else:
delta //= 2
delta += delta // numchars
divisions = 0
while delta > 455:
delta = delta // 35 divisions += 36
bias = divisions + (36 * delta // (delta + 38))
return bias
def generate_integers(baselen, deltas):
result = bytearray()
bias = 72
for points, delta in enumerate(deltas):
s = generate_generalized_integer(delta, bias)
result.extend(s)
bias = adapt(delta, points==0, baselen+points+1)
return bytes(result)
def punycode_encode(text):
base, extended = segregate(text)
deltas = insertion_unsort(text, extended)
extended = generate_integers(len(base), deltas)
if base:
return base + b"-" + extended
return extended
def decode_generalized_number(extended, extpos, bias, errors):
result = 0
w = 1
j = 0
while 1:
try:
char = extended[extpos]
except IndexError:
if errors == "strict":
raise UnicodeDecodeError("punycode", extended, extpos, extpos+1,
"incomplete punycode string")
return extpos + 1, None
extpos += 1
if 0x41 <= char <= 0x5A: digit = char - 0x41
elif 0x30 <= char <= 0x39:
digit = char - 22 elif errors == "strict":
raise UnicodeDecodeError("punycode", extended, extpos-1, extpos,
f"Invalid extended code point '{extended[extpos-1]}'")
else:
return extpos, None
t = T(j, bias)
result += digit * w
if digit < t:
return extpos, result
w = w * (36 - t)
j += 1
def insertion_sort(base, extended, errors):
char = 0x80
pos = -1
bias = 72
extpos = 0
while extpos < len(extended):
newpos, delta = decode_generalized_number(extended, extpos,
bias, errors)
if delta is None:
return base
pos += delta+1
char += pos // (len(base) + 1)
if char > 0x10FFFF:
if errors == "strict":
raise UnicodeDecodeError(
"punycode", extended, pos-1, pos,
f"Invalid character U+{char:x}")
char = ord('?')
pos = pos % (len(base) + 1)
base = base[:pos] + chr(char) + base[pos:]
bias = adapt(delta, (extpos == 0), len(base))
extpos = newpos
return base
def punycode_decode(text, errors):
if isinstance(text, str):
text = text.encode("ascii")
if isinstance(text, memoryview):
text = bytes(text)
pos = text.rfind(b"-")
if pos == -1:
base = ""
extended = text.upper()
else:
try:
base = str(text[:pos], "ascii", errors)
except UnicodeDecodeError as exc:
raise UnicodeDecodeError("ascii", text, exc.start, exc.end,
exc.reason) from None
extended = text[pos+1:].upper()
try:
return insertion_sort(base, extended, errors)
except UnicodeDecodeError as exc:
offset = pos + 1
raise UnicodeDecodeError("punycode", text,
offset+exc.start, offset+exc.end,
exc.reason) from None
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
res = punycode_encode(input)
return res, len(input)
def decode(self, input, errors='strict'):
if errors not in ('strict', 'replace', 'ignore'):
raise UnicodeError(f"Unsupported error handling: {errors}")
res = punycode_decode(input, errors)
return res, len(input)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return punycode_encode(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
if self.errors not in ('strict', 'replace', 'ignore'):
raise UnicodeError(f"Unsupported error handling: {self.errors}")
return punycode_decode(input, self.errors)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
def getregentry():
return codecs.CodecInfo(
name='punycode',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)