import pandas as pd
import sys
def read_data_from(created_date):
tl_frame = pd.read_csv('iso-639-1-names.csv', sep=',', header=0)
tl_names = {}
for row in tl_frame.itertuples():
tl_names[row.ID] = {
'english': row.English_Name,
'indigenous': row.Indigenous_Name
}
macros = pd.read_csv('iso-639-3-macrolanguages_%s.tab' % created_date, sep='\t', header=0)
macro_languages = {}
for row in macros.itertuples():
if row.I_Status == 'A':
parent = row.M_Id
if parent not in macro_languages:
macro_languages[parent] = []
child = row.I_Id
if isinstance(child, str):
macro_languages[parent].append(child)
languages = []
main = pd.read_csv('iso-639-3_%s.tab' % created_date, sep='\t', header=0)
for row in main.itertuples():
children = []
if row.Scope == 'M':
children = macro_languages[row.Id]
languages.append({
'id': row.Id,
'name': row.Ref_Name,
'b_id': row.Part2B,
't_id': row.Part2T,
'sid': row.Part1,
'scope': row.Scope,
'l_type': row.Language_Type,
'children': children,
})
return (languages, tl_names)
scope_values = {
'I': 'Individual',
'M': 'MacroLanguage',
'S': 'Special'
}
type_values = {
'A': 'Ancient',
'C': 'Constructed',
'E': 'Extinct',
'H': 'Historical',
'L': 'Living',
'S': 'Special'
}
def write_data_out(languages, tl_names, out_path):
rows = map(
lambda linfo:
'"%s":{%s}' % (
linfo['id'],
','.join([
'"code":"%s"' % linfo['id'],
'"reference_name":"%s"' % clean(linfo['name']),
'"indigenous_name":%s' % indigenous_name(linfo['sid'], tl_names),
'"other_names":%s' % other_names(linfo['sid'], tl_names),
'"bibliographic_code":%s' % optional_string(linfo['b_id']),
'"terminology_code":%s' % optional_string(linfo['t_id']),
'"short_code":%s' % optional_string(linfo['sid']),
'"scope":"%s"' % scope_values[linfo['scope']],
'"l_type":"%s"' % type_values[linfo['l_type']],
'"family_members":%s' % optional_vector(linfo['children']),
])),
languages)
print('writing %s/languages.json' % out_path)
with open('%s/languages.json' % out_path, 'w') as text_file:
print('{%s}' % ','.join(rows), file=text_file)
def clean(s):
return s.strip().replace('"', r'\"')
def other_names(key, map):
if isinstance(key, str) and key in map:
names = map[key]['english'].split(';')
if len(names) > 1:
return optional_vector(names[1:])
return 'null'
def indigenous_name(key, map):
if isinstance(key, str) and key in map:
return '"%s"' % clean(map[key]['indigenous'])
else:
return 'null'
def optional_string(s):
return ('"%s"' % s) if isinstance(s, str) else 'null'
def optional_vector(v):
if len(v) == 0:
return 'null'
else:
return '[%s]' % ','.join(list(map(lambda x: '"%s"' % clean(x), v)))
if len(sys.argv) < 2:
print('Error: need a path argument')
else:
write_data_out(*read_data_from('20190408'), sys.argv[1])