import xml
import codecs
from xml.etree.ElementTree import parse
def convert_xml_entities(in_file, out_obj):
import xml.etree.ElementTree as ET
tree = ET.parse(in_file)
root = tree.getroot()
all_chars = root.find("charlist")
for char in all_chars:
count = 0
for op_dict in char.findall("operator-dictionary"):
if count == 0:
out_obj.write("\t\"{}\" => ".format( convert_to_char(char.get("id")) ) )
else:
out_obj.write("\n\t\t\tnext: &Some( ")
out_obj.write("OperatorInfo{} op_type: OperatorTypes::{}, priority: {}, ".format(
"{",
compute_form(op_dict.get("form"), op_dict.get("fence")),
op_dict.get("priority") ))
count += 1
if count > 0:
out_obj.write("next: &None ")
if count > 1:
while count > 1:
out_obj.write("} )")
count -= 1
out_obj.write("},\n")
def convert_to_char(str: str):
str = str.split("U")[1] answer = ""
for char_str in str.split("-"):
ch = chr(int(char_str, base=16))
if (ch == '"' or ch == '\\'):
answer += "\\"
answer += ch
return answer
def compute_form(form: str, fence: str):
if fence == None or fence=="false":
return form.upper()
if form == "prefix":
return "LEFT_FENCE"
elif form == "postfix":
return "RIGHT_FENCE"
else:
return form.upper()
def add_wrappers(in_file: str, out_file: str):
with open(out_file, 'w', encoding="utf8") as out_obj:
out_obj.write("phf_map! {\n")
convert_xml_entities(in_file, out_obj)
out_obj.write("\n\t// the following are not in the operator dictionary and are added \"manually\"\n")
out_obj.write("\t\"mod\" => OperatorInfo{ op_type: OperatorTypes::INFIX, priority: 800, next: &None },\n")
out_obj.write("}\n")
add_wrappers("c:\\dev\\mathml-refresh\\xml-entities\\unicode.xml", "out.rs")