import argparse
import operator
import os
import sys
import ujson as json
this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.polygons.index import *
from geodata.i18n.languages import *
from geodata.language_id.disambiguation import disambiguate_language, AMBIGUOUS_LANGUAGE, UNKNOWN_LANGUAGE, WELL_REPRESENTED_LANGUAGES
country_language_dir = os.path.join(LANGUAGES_DIR, 'countries')
regional_language_dir = os.path.join(LANGUAGES_DIR, 'regional')
class LanguagePolygonIndex(RTreePolygonIndex):
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
ADMIN_LEVELS_FILENAME = 'admin_levels.json'
include_only_properties = set([
'qs_a0',
'qs_iso_cc',
'qs_a1',
'qs_a1_lc',
'qs_a1r',
'qs_a1r_lc',
'qs_level',
'languages',
'admin_level'
])
@classmethod
def create_from_shapefiles(cls,
admin0_shapefile,
admin1_shapefile,
admin1_region_file,
output_dir,
index_filename=None,
polys_filename=DEFAULT_POLYS_FILENAME):
init_languages()
index = cls(save_dir=output_dir, index_filename=index_filename)
i = 0
for input_file in (admin0_shapefile, admin1_region_file, admin1_shapefile):
f = fiona.open(input_file)
for rec in f:
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
continue
country = rec['properties']['qs_iso_cc'].lower()
properties = rec['properties']
admin_level = properties['qs_level']
level_num = None
if admin_level == 'adm1':
name_key = 'qs_a1'
code_key = 'qs_a1_lc'
level_num = 1
elif admin_level == 'adm1_region':
name_key = 'qs_a1r'
code_key = 'qs_a1r_lc'
level_num = 1
elif admin_level == 'adm0':
level_num = 0
else:
continue
assert level_num is not None
if admin_level != 'adm0':
admin1 = properties.get(name_key)
admin1_code = properties.get(code_key)
regional = None
if name_key:
regional = get_regional_languages(country, name_key, admin1)
if code_key and not regional:
regional = get_regional_languages(country, code_key, admin1_code)
if not regional:
continue
if all((not default for lang, default in regional.iteritems())):
languages = get_country_languages(country)
languages.update(regional)
languages = languages.items()
else:
languages = regional.items()
else:
languages = get_country_languages(country).items()
properties['languages'] = [{'lang': lang, 'default': default}
for lang, default in languages]
properties['admin_level'] = level_num
poly_type = rec['geometry']['type']
if poly_type == 'Polygon':
poly = cls.to_polygon(rec['geometry']['coordinates'][0])
index.index_polygon(poly)
poly = index.simplify_polygon(poly)
index.add_polygon(poly, dict(rec['properties']))
elif poly_type == 'MultiPolygon':
polys = []
for coords in rec['geometry']['coordinates']:
poly = cls.to_polygon(coords[0])
polys.append(poly)
index.index_polygon(poly)
multi_poly = index.simplify_polygon(MultiPolygon(polys))
index.add_polygon(multi_poly, dict(rec['properties']))
else:
continue
i += 1
return index
@classmethod
def create_with_quattroshapes(cls, quattroshapes_dir,
output_dir,
index_filename=None,
polys_filename=DEFAULT_POLYS_FILENAME):
admin0_filename = os.path.join(quattroshapes_dir, 'qs_adm0.shp')
admin1_filename = os.path.join(quattroshapes_dir, 'qs_adm1.shp')
admin1r_filename = os.path.join(quattroshapes_dir, 'qs_adm1_region.shp')
return cls.create_from_shapefiles(admin0_filename, admin1_filename, admin1r_filename,
output_dir, index_filename=index_filename,
polys_filename=polys_filename)
def setup(self):
self.admin_levels = []
def index_polygon_properties(self, properties):
self.admin_levels.append(properties['admin_level'])
def load_polygon_properties(self, d):
self.admin_levels = json.load(open(os.path.join(d, self.ADMIN_LEVELS_FILENAME)))
def save_polygon_properties(self, d):
json.dump(self.admin_levels, open(os.path.join(d, self.ADMIN_LEVELS_FILENAME), 'w'))
def admin_level(self, i):
return self.admin_levels[i]
def get_candidate_polygons(self, lat, lon):
candidates = OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
return sorted(candidates, key=self.admin_level, reverse=True)
def country_and_languages(self, latitude, longitude):
props = self.point_in_poly(latitude, longitude, return_all=True)
if not props:
return None, None, None
country = props[0]['qs_iso_cc'].lower()
languages = []
language_set = set()
have_regional = False
for p in props:
for l in p['languages']:
lang = l['lang']
if lang not in language_set:
language_set.add(lang)
if p['admin_level'] > 0 and l['default']:
have_regional = True
elif have_regional:
l = {'lang': l['lang'], 'default': 0}
languages.append(l)
default_languages = sorted(languages, key=operator.itemgetter('default'), reverse=True)
return country, default_languages, props
def best_country_and_language(self, latitude, longitude, name):
country, candidate_languages, language_props = self.country_and_languages(latitude, longitude)
if not (country and candidate_languages):
return None, None
num_langs = len(candidate_languages)
default_langs = set([l['lang'] for l in candidate_languages if l.get('default')])
num_defaults = len(default_langs)
regional_defaults = 0
country_defaults = 0
regional_langs = set()
country_langs = set()
for p in language_props:
if p['admin_level'] > 0:
regional_defaults += sum((1 for lang in p['languages'] if lang.get('default')))
regional_langs |= set([l['lang'] for l in p['languages']])
else:
country_defaults += sum((1 for lang in p['languages'] if lang.get('default')))
country_langs |= set([l['lang'] for l in p['languages']])
if num_langs == 1:
return country, candidate_languages[0]['lang']
else:
lang = disambiguate_language(name, [(l['lang'], l['default']) for l in candidate_languages])
default_lang = candidate_languages[0]['lang']
if lang == UNKNOWN_LANGUAGE and num_defaults == 1:
return country, default_lang
elif lang == AMBIGUOUS_LANGUAGE:
return country, lang
elif lang != UNKNOWN_LANGUAGE:
if lang != default_lang and lang in country_langs and country_defaults > 1 and regional_defaults > 0 and lang in WELL_REPRESENTED_LANGUAGES:
return country, UNKNOWN_LANGUAGE
return country, lang
else:
return country, lang
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-q', '--quattroshapes-dir',
help='Path to quattroshapes dir')
parser.add_argument('-o', '--out-dir',
default=os.getcwd(),
help='Output directory')
args = parser.parse_args()
index = LanguagePolygonIndex.create_with_quattroshapes(args.quattroshapes_dir, args.out_dir)
index.save()