from ascii_braille import ascii_to_unicode
def write_ueb_yaml(in_file, out_file):
with open(out_file, 'w', encoding="utf8") as out_stream:
with open(in_file, encoding="utf-8") as yaml_in_file:
out_stream.write("---\n")
for line in yaml_in_file:
out_stream.write(line)
write_letters_and_digits(out_stream)
def code_point(list):
return list[2]
def unicode_char(list):
ch = list[0]
if (ch == '"' or ch == '\\'):
ch = "\\" + ch
return ch
def unicode_name(list):
return list[3]
def ueb(list):
first_char = list[1][0]
unicode = unicode_char(list)
braille = ''
if not( (unicode.isascii() and not(unicode in ['?', '.', ',', ';', ':'])) or
(first_char in '#@^._";,') or (unicode in "…′″‴") ): print("Adding grade 1 to: ", list)
braille = '1'
try:
braille += ascii_to_unicode(list[1])
except:
print("Illegal ASCII braille: '{}'".format(list[1]))
if first_char == '$' or list[1].startswith('.$') or list[1].startswith('_$'):
braille += 't'
return braille
def write_yaml_line(out_stream, char, nemeth, hex, unicode_name):
description = "" if unicode_name =="" else '(' + unicode_name + ')'
first_part = ' - "{}": [t: "{}"]'.format(char, nemeth)
out_stream.write('{:32}# 0x{} {}\n'.format(
first_part, hex, description))
def write_letters_and_digits(out_stream):
digits = ["⠚", "⠁", "⠃", "⠉", "⠙", "⠑", "⠋", "⠛", "⠓", "⠊"]
small_latin = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" ]
small_greek = ["a", "b", "g", "d", "e", "z", "⠱ ", "⠹", "i", "k", "l", "m", "n",
"x", "o", "p", "r", "s", "t", "u", "f", "⠯", "y", "w" ]
write_letter_chars(out_stream, digits, '0', "N")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), 'a', "L")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), 'A', "CL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), 'α', "GL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), 'Α', "CGL") write_special_greeks(out_stream)
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝐚', "BL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝐀', "BCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝑎', "IL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝐴', "ICL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝒂', "BIL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝑨', "BICL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝒶', "TsL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝒜', "TsCL")
write_yaml_line(out_stream, "ℊ", "TL⠛⠁","210a", "Script Small G")
write_yaml_line(out_stream, "ℋ", "TCL⠓⠁","210b", "Script Capital H")
write_yaml_line(out_stream, "ℒ", "TCL⠇","2113", "Script Capital L")
write_yaml_line(out_stream, "ℓ", "TL⠇","2113", "Script Small L")
write_yaml_line(out_stream, "℘", "TCL⠏","2118", "Script Capital P")
write_yaml_line(out_stream, "ℛ", "TCL⠗","211B", "Script Capital R")
write_yaml_line(out_stream, "ℯ", "TL⠑","212F", "Script Small E")
write_yaml_line(out_stream, "ℰ", "TCL⠑","2130", "Script Capital E")
write_yaml_line(out_stream, "ℱ", "TCL⠋","2131", "Script Capital F")
write_yaml_line(out_stream, "ℳ", "TCL⠍","2133", "Script Capital M")
write_yaml_line(out_stream, "ℴ", "TL⠕","21334", "Script Small O")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝓪', "BTsL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝓐', "BTsCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝔞', "DL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝔄', "DCL")
write_yaml_line(out_stream, "ℌ", "DCL⠓","210C", "Fraktur Capital H")
write_yaml_line(out_stream, "ℑ", "DCL⠊","2111", "Fraktur Capital I")
write_yaml_line(out_stream, "ℜ", "DCL⠗","211C", "Fraktur Capital R")
write_yaml_line(out_stream, "ℨ", "DCL⠵","2128", "Fraktur Capital Z")
write_yaml_line(out_stream, "ℭ", "DCL⠉","22DC", "Fraktur Capital C")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝕒', "𝔹L")
write_yaml_line(out_stream, "𝔸", "𝔹CL⠁","1d538", "")
write_yaml_line(out_stream, "𝔹", "𝔹CL⠃","1d539", "")
write_yaml_line(out_stream, "ℂ", "𝔹CL⠉","2102", "")
write_yaml_line(out_stream, "𝔻", "𝔹CL⠙","1d53b", "")
write_yaml_line(out_stream, "𝔼", "𝔹CL⠑","1d53c", "")
write_yaml_line(out_stream, "𝔽", "𝔹CL⠋","1d53d", "")
write_yaml_line(out_stream, "𝔾", "𝔹CL⠛","1d53e", "")
write_yaml_line(out_stream, "ℍ", "𝔹CL⠓","210d", "")
write_yaml_line(out_stream, "𝕀", "𝔹CL⠊","1d540", "")
write_yaml_line(out_stream, "𝕁", "𝔹CL⠚","1d541", "")
write_yaml_line(out_stream, "𝕂", "𝔹CL⠅","1d542", "")
write_yaml_line(out_stream, "𝕃", "𝔹CL⠇","1d543", "")
write_yaml_line(out_stream, "𝕄", "𝔹CL⠍","1d544", "")
write_yaml_line(out_stream, "ℕ", "𝔹CL⠝","2115", "")
write_yaml_line(out_stream, "𝕆", "𝔹CL⠕","1d546", "")
write_yaml_line(out_stream, "ℙ", "𝔹CL⠏","2119", "")
write_yaml_line(out_stream, "ℚ", "𝔹CL⠟","211a", "")
write_yaml_line(out_stream, "ℝ", "𝔹CL⠗","211d", "")
write_yaml_line(out_stream, "𝕊", "𝔹CL⠎","1d54a", "")
write_yaml_line(out_stream, "𝕋", "𝔹CL⠞","1d54b", "")
write_yaml_line(out_stream, "𝕌", "𝔹CL⠥","1d54c", "")
write_yaml_line(out_stream, "𝕍", "𝔹CL⠧","1d54d", "")
write_yaml_line(out_stream, "𝕎", "𝔹CL⠺","1d54e", "")
write_yaml_line(out_stream, "𝕏", "𝔹CL⠭","1d54f", "")
write_yaml_line(out_stream, "𝕐", "𝔹CL⠽","1d550", "")
write_yaml_line(out_stream, "ℤ", "𝔹CL⠵","2124", "")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝖆', "BDL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝕬', "BDCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝖺', "SL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝖠', "SCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝗮', "BSL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝗔', "BSCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝘢', "ISL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝘈', "ISCL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝙖', "BILS")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝘼', "BILSC")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝚊', "L")
write_letter_chars(out_stream, my_ascii_to_unicode(small_latin), '𝙰', "CL")
write_yaml_line(out_stream, "𝚤", "L⠊", "1d6a4", "dotless i")
write_yaml_line(out_stream, "𝚥", "L⠚", "1d6a5", "dotless j")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝛂', "BGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝚨', "BCGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝛼', "IGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝛢', "ICGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝜶', "BIGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝜜', "BICGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝝰', "BSGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝝖', "BSCGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝞪', "BISGL")
write_letter_chars(out_stream, my_ascii_to_unicode(small_greek), '𝞐', "BISCGL")
write_letter_chars(out_stream, digits, '𝟎', "BN")
write_letter_chars(out_stream, digits, '𝟘', "𝔹N")
write_letter_chars(out_stream, digits, '𝟢', "SN")
write_letter_chars(out_stream, digits, '𝟬', "BSN")
write_letter_chars(out_stream, digits, '𝟶', "N")
write_vulgar_fractions(out_stream)
def write_letter_chars(out_stream, list, first_char, prefix):
for i in range(0,len(list)):
unicode = ord(first_char) + i
if list[i] != "":
write_yaml_line(out_stream, chr(unicode), prefix + list[i], hex(unicode)[2:], "")
out_stream.write("\n")
def my_ascii_to_unicode(list_of_chars):
result = []
for ch in list_of_chars:
if ch == "":
result.append("")
elif ch.islower():
result.append(ascii_to_unicode(ch))
else:
result.append(ch)
return result
def write_special_greeks(out_stream):
write_yaml_line(out_stream, "µ", "GL" + ascii_to_unicode("m"),"00B5", "Micro (Greek mu)")
write_yaml_line(out_stream, "Ω", "CGL" + ascii_to_unicode("m"),"2126", "Ohm sign (capital Greek omega)")
write_yaml_line(out_stream, "∆", "CGL" + ascii_to_unicode("d"),"2206", "Increment (capital Greek delta)")
write_yaml_line(out_stream, "∏", "CGL" + ascii_to_unicode("p"),"220F", "Product (capital Greek pi)")
write_yaml_line(out_stream, "∑", "CGL" + ascii_to_unicode("s"),"2211", "Sum (capital Greek sigma)")
out_stream.write("\n")
def write_vulgar_fractions(out_stream):
write_yaml_line(out_stream, "¼", "#N⠁N⠌N⠙","00BC", "Vulgar Fraction One Quarter")
write_yaml_line(out_stream, "½", "#N⠁N⠌N⠃","00BD", "Vulgar Fraction One Half")
write_yaml_line(out_stream, "¾", "#N⠉N⠌N⠙","00BE", "Vulgar Fraction Three Quarters")
write_yaml_line(out_stream, "⅐", "#N⠁N⠌N⠛","2150", "Vulgar Fraction One Seventh")
write_yaml_line(out_stream, "⅑", "#N⠁N⠌N⠊","2151", "Vulgar Fraction One Ninth")
write_yaml_line(out_stream, "⅒", "#N⠁N⠌N⠁N⠚","2152", "Vulgar Fraction One Tenth")
write_yaml_line(out_stream, "⅓", "#N⠁N⠌N⠉","2153", "Vulgar Fraction One Third")
write_yaml_line(out_stream, "⅔", "#N⠃N⠌N⠉","2154", "Vulgar Fraction Two Thirds")
write_yaml_line(out_stream, "⅕", "#N⠁N⠌N⠑","2155", "Vulgar Fraction One Fifth")
write_yaml_line(out_stream, "⅖", "#N⠃N⠌N⠑","2156", "Vulgar Fraction Two Fifths")
write_yaml_line(out_stream, "⅗", "#N⠉N⠌N⠑","2157", "Vulgar Fraction Three Fifths")
write_yaml_line(out_stream, "⅘", "#N⠙N⠌N⠑","2158", "Vulgar Fraction Four Fifths")
write_yaml_line(out_stream, "⅙", "#N⠁N⠌N⠋","2159", "Vulgar Fraction One Sixth")
write_yaml_line(out_stream, "⅚", "#N⠑N⠌N⠋","215A", "Vulgar Fraction Five Sixths")
write_yaml_line(out_stream, "⅛", "#N⠁N⠌N⠓","215B", "Vulgar Fraction One Eighth")
write_yaml_line(out_stream, "⅜", "#N⠉N⠌N⠓","215C", "Vulgar Fraction Three Eighths")
write_yaml_line(out_stream, "⅝", "#N⠑N⠌N⠓","215D", "Vulgar Fraction Five Eighths")
write_yaml_line(out_stream, "⅞", "#N⠛N⠌N⠓","215E", "Vulgar Fraction Seven Eighths")
write_yaml_line(out_stream, "↉", "#N⠚N⠌N⠑","2189", "Vulgar Fraction Zero Thirds")
out_stream.write("\n")
def write_special_lines(out_stream):
for ch in [ [" ", "N⠐", "W", "mn", "Space"],
[" ", "N⠐", "W", "mn", "Non-breaking Space"],
[",", "N⠂", ",", "mn", "Comma"],
[".", "N⠲", ".", "mn", "Period"],
["-", "⠐⠤", "⠤", "mo", "Minus sign or hyphen"],
]:
description = "" if ch[4]=="" else '(' + ch[4] + ')'
out_stream.write(' - "{}": # 0x{} {}\n'.format(ch[0], hex(ord(ch[0]))[2:], description))
out_stream.write(' - test:\n')
out_stream.write(' if: "self::m:{}"\n'.format(ch[3]))
out_stream.write(' then: [t: "{}"]\n'.format(ch[1]))
out_stream.write(' else: [t: "{}"]\n'.format(ch[2]))
shortforms = [
'ab', 'abv', 'ac', 'acr', 'af', 'afn', 'afw', 'ag', 'ag/', 'al', 'alm', 'alr', 'alt', 'al?', 'alw',
'bl', 'brl', 'cd', 'dcl', 'dclg', 'dcv', 'dcvg',
'ei', 'fr', 'f/', 'gd', 'grt', 'hm', 'hmf', 'h]f', 'imm',
'll', 'lr', 'myf', 'm*', 'm/', 'nec', 'nei', 'pd', 'p]cv', 'p]cvg', 'p]h',
'qk', 'rcv', 'rcvg', 'rjc', 'rjcg', 'sd', 's*', 'td', 'tgr', 'tm', 'tn',
'xf', 'xs', 'yr', 'yrf', 'yrvs', '!mvs', '*n', '%d', '?yf', '\\rvs', 'wd',
'2c', '2f', '2h', '2l', '2n', '2s', '2t', '2y',
'3cv', '3cvg', '"of'
]
def str_to_unicode(str):
return "".join(map(ascii_to_unicode, str))
def add_letter(str: str):
return "".join(map(lambda ch: 'L'+ch, str))
def convert_shortforms():
braille = list(map(str_to_unicode, shortforms))
with_letters = map(add_letter, braille)
print(list(with_letters))
import re
INPUT_LINE = re.compile('[^"]*"(?P<char>.)"[^"]*"(?P<nemeth>[^"]*)"](?P<rest>.*)')
import yaml
def nemeth_shape_to_ueb_shape(in_file, out_file):
with open(out_file, 'w', encoding="utf8") as out_stream:
with open(in_file, encoding="utf-8-sig") as in_stream:
for line in in_stream:
out_stream.write(convert_line_to_ueb(line))
def convert_line_to_ueb(line: str):
m = INPUT_LINE.match(line).groupdict()
return ' - "{}": "{}"{}\n'.format(m["char"], covert_nemeth_to_ueb(m["nemeth"]), m["rest"])
SHAPE_RE = re.compile(
("("
"⠫"
"(?P<filled>[⠸⠨]?)"
"(?P<shape>[⠁⠄⠙⠑⠊⠇⠕⠏⠛⠟⠗⠓⠎⠵⠞⠉⠪⠲])"
"(((?P<imod>(⠸⠫))|(?P<smod>⠨))(?P<char>[^⠻]+))"
"(?P<end>⠻?)"
")")
)
ARROW_RE = re.compile(
("("
"⠫"
"(?P<dir>[⠣⠩⠘⠰]?)"
"(?P<bold>⠸?)"
"(?P<lHalf>[⠠⠈]?)"
"(?P<lKind>[⠿⠳⠪⠯]?[⠿⠳⠪⠯]?)"
"(?P<shaft>(⠢⠔|⠒⠀⠒|⠂⠂⠂|⠶⠶⠶|⠶⠶|⠶|⠒⠒⠒|⠒⠒|⠒|⠔⠒⠢)?)"
"(?P<rHalf>[⠠⠈]?)"
"(?P<rKind>[⠿⠳⠕⠽]?[⠿⠳⠕⠽]?)"
")")
)
def covert_nemeth_to_ueb(nemeth: str):
parts = SHAPE_RE.match(nemeth)
if parts:
return shape_start(parts["filled"]) + \
shape_convert(parts["shape"]) + \
interior_char(parts["imod"], parts["char"]) + \
structural_char(parts["smod"], parts["char"]) + \
"T"
parts = ARROW_RE.match(nemeth)
if parts:
return arrow_start(parts["bold"]) + \
arrow_convert(parts["arrow"]) + \
interior_char(parts["imod"], parts["char"]) + \
structural_char(parts["smod"], parts["char"]) + \
"T"
return "NONE!!!"
SHAPE_START = {
"⠸": "⠸⠫", "⠨": "⠨⠫", }
def shape_start(nemeth_filled):
return "S" + SHAPE_START.get(nemeth_filled, "1⠫")
SHAPE = {
"⠞": "⠼⠉", "⠲": "⠼⠙", "⠢": "⠼⠑ ", "⠖": "⠼⠋", "⠦": "⠼⠓", "⠉": "⠿", "⠛": "⠈⠼⠙", }
def shape_convert(nemeth_shape):
return SHAPE.get(nemeth_shape, "UNKNOWN SHAPE!")
def interior_char(indicator, ch):
if indicator == None:
return ""
else:
return "1⠪" + translate(ch)
def structural_char(indicator, ch):
if indicator == None:
return ""
else:
return "structural_char" + translate(ch)
def arrow_start(bold):
if bold == None:
return "1⠳"
else:
return "1⠘⠳"
def translate(ch):
return "**" + ch + "**"
write_ueb_yaml("ueb-from-csv.yaml", "unicode.yaml")