locale-codes 0.3.0

locale-related codes/identifiers and any standards-based information concerning them.
Documentation
import pandas as pd
import sys

def read_data_from(created_date):
    tl_frame = pd.read_csv('iso-639-1-names.csv', sep=',', header=0)
    tl_names = {}
    for row in tl_frame.itertuples():
        tl_names[row.ID] = {
            'english': row.English_Name,
            'indigenous': row.Indigenous_Name
        }

    macros = pd.read_csv('iso-639-3-macrolanguages_%s.tab' % created_date, sep='\t', header=0)
    macro_languages = {}
    for row in macros.itertuples():
        if row.I_Status == 'A':
            parent = row.M_Id
            if parent not in macro_languages:
                macro_languages[parent] = []
            child = row.I_Id
            if isinstance(child, str):
                macro_languages[parent].append(child)

    languages = []
    main = pd.read_csv('iso-639-3_%s.tab' % created_date, sep='\t', header=0)
    for row in main.itertuples():
        children = []
        if row.Scope == 'M':
            children = macro_languages[row.Id]
        languages.append({
            'id': row.Id,
            'name': row.Ref_Name,
            'b_id': row.Part2B,
            't_id': row.Part2T,
            'sid': row.Part1,
            'scope': row.Scope,
            'l_type': row.Language_Type,
            'children': children,
        })
    return (languages, tl_names)

scope_values = {
    'I': 'Individual',
    'M': 'MacroLanguage',
    'S': 'Special'
}

type_values = {
    'A': 'Ancient',
    'C': 'Constructed',
    'E': 'Extinct',
    'H': 'Historical',
    'L': 'Living',
    'S': 'Special'
}

def write_data_out(languages, tl_names, out_path):
    rows = map(
        lambda linfo:
        '"%s":{%s}' % (
            linfo['id'],
            ','.join([
                '"code":"%s"' % linfo['id'],
                '"reference_name":"%s"' % clean(linfo['name']),
                '"indigenous_name":%s' % indigenous_name(linfo['sid'], tl_names),
                '"other_names":%s' % other_names(linfo['sid'], tl_names),
                '"bibliographic_code":%s' % optional_string(linfo['b_id']),
                '"terminology_code":%s' % optional_string(linfo['t_id']),
                '"short_code":%s' % optional_string(linfo['sid']),
                '"scope":"%s"' % scope_values[linfo['scope']],
                '"l_type":"%s"' % type_values[linfo['l_type']],
                '"family_members":%s' % optional_vector(linfo['children']),
            ])),
        languages)
    print('writing %s/languages.json' % out_path)
    with open('%s/languages.json' % out_path, 'w') as text_file:
        print('{%s}' % ','.join(rows), file=text_file)

def clean(s):
    return s.strip().replace('"', r'\"')

def other_names(key, map):
    if isinstance(key, str) and key in map:
        names =  map[key]['english'].split(';')
        if len(names) > 1:
            return optional_vector(names[1:])
    return 'null'

def indigenous_name(key, map):
    if isinstance(key, str) and key in map:
        return '"%s"' % clean(map[key]['indigenous'])
    else:
        return 'null'

def optional_string(s):
    return ('"%s"' % s) if isinstance(s, str) else 'null'

def optional_vector(v):
    if len(v) == 0:
        return 'null'
    else:
        return '[%s]' % ','.join(list(map(lambda x: '"%s"' % clean(x), v)))

if len(sys.argv) < 2:
    print('Error: need a path argument')
else:
    write_data_out(*read_data_from('20190408'), sys.argv[1])