from bs4 import BeautifulSoup
def create_unicode_from_html(in_file: str, out_file):
with open(in_file, encoding='utf8') as in_stream:
with open(out_file, 'a', encoding='utf8') as out_stream:
file_contents = BeautifulSoup(in_stream, features="html.parser")
for row in file_contents.find_all('tr'):
cols = row.find_all('td')
if len(cols) > 0:
generate_char(out_stream, cols[1].get_text(), cols[2].get_text(), cols[3].get_text())
def generate_char(out_stream, hex: str, description: str, braille: str):
first_part = ' - "{}": [t: "{}"]'.format(chr(int(hex, base=16)), braille)
out_stream.write('{:32}# 0x{} ({})\n'.format(
first_part, hex, description))
def generate_digits(out_file, prefix: str, hex: str, description: str):
with open(out_file, 'a', encoding='utf8') as out_stream:
i = 0
for digit in "⠴⠂⠆⠒⠲⠢⠖⠶⠦⠔":
as_int = int(hex,16) +i
generate_char(out_stream, "{0:x}".format(as_int), "{} {}".format('bold', i), prefix+'⠼'+digit)
i += 1
import json
def create_unicode_from_json(in_file: str, out_file):
with open(in_file, encoding='utf8') as in_stream:
with open(out_file, 'a', encoding='utf8') as out_stream:
file_contents = json.load(in_stream)
info = file_contents["information"].replace("tests", "chars")
out_stream.write('\n # --- {} ---\n'.format(info))
for char, braille in file_contents['tests'].items():
if braille["expected"] != "":
generate_char_line(out_stream, char, braille["expected"], "default_alphabet" in in_file)
import re
MATCH_FACE_LANG_CHAR = re.compile(
("("
"(?P<prefix>^)"
"(?P<sans>⠠⠨)?"
"(?P<bold>⠸(?=[^⠠⠔⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠯⠫⠱⠹]))?"
"(?P<script>⠈(?=[^⠠⠔⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠯⠫⠱⠹]))?"
"(?P<italic>⠨(?=[^⠠⠔⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠯⠫⠱⠹]))?"
"((((?P<en>⠰)|(?P<de>⠸)|(?P<el>⠨)|(?P<el_var>⠨⠈))?"
"(?P<cap>⠠)?(?P<char>[⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠯⠫⠱⠹]))|"
"((?P<num>⠼?)(?P<digit>[⠴⠂⠆⠒⠲⠢⠖⠶⠦⠔])))"
"(?P<postfix>$)"
")")
)
SRE_CHAR_FIXES = {
"⪼": "⠨⠨⠂⠈⠨⠨⠂⠻",
"≻": "⠨⠨⠂",
"≿": "⠨⠨⠂⠈⠱",
"⪺": "⠨⠨⠂⠌⠈⠱⠈⠱",
"⪸": "⠨⠨⠂⠈⠱⠈⠱",
"⪶": "⠨⠨⠂⠌⠨⠅",
"⪴": "⠨⠨⠂⠨⠅",
"⪲": "⠨⠨⠂⠌⠱",
"⪰": "⠨⠨⠂⠱",
}
def generate_char_line(out_stream, char: str, braille: str, is_letter: bool):
if braille == "⠀":
braille = "W"
braille = SRE_CHAR_FIXES.get(char, braille)
if (char == '"' or char == '\\'):
char = "\\" + char
result = ""
matched = MATCH_FACE_LANG_CHAR.match(braille)
if is_letter and matched:
dict = matched.groupdict()
if dict["prefix"]:
result += dict["prefix"]
if dict["sans"]:
result += "S"
if dict["bold"]:
result += "B"
if dict["script"]:
result += "T"
if dict["italic"]:
result += "I"
if dict["en"]:
result += "E"
if dict["de"]:
result += "D"
if dict["el"]:
result += "G"
if dict["el_var"]:
result += "V"
if dict["char"]:
if dict["cap"]:
result += "C"
result += "L" + dict["char"]
if dict["digit"]:
result += "N" + dict["digit"]
if dict["postfix"]:
result += dict["postfix"]
else:
result = braille
first_part = ' - "{}": [t: "{}"]'.format(char, result)
out_stream.write('{:32}# {}\n'.format(first_part, hex(ord(char[-1])) ))
import os
if os.path.exists("out"):
os.remove("out")
path = "C:/Dev/speech-rule-engine/sre-tests/expected/nemeth/symbols/"
if os.path.exists("alphanumerics.txt"):
os.remove("alphanumerics.txt")
for filename in os.listdir(path):
if not(filename == 'default_functions.json' or filename == 'default_si_units.json' or filename == 'default_units.json'):
create_unicode_from_json(path+filename, "alphanumerics.txt")