#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "log/log.h"
#include "sparkey/sparkey.h"
#include "collections.h"
#include "constants.h"
#include "libpostal_config.h"
#include "file_utils.h"
#include "gazetteers.h"
#include "geonames.h"
#include "geodb.h"
#include "geo_disambiguation.h"
#include "graph.h"
#include "graph_builder.h"
#include "msgpack_utils.h"
#include "normalize.h"
#include "string_utils.h"
#include "geonames_fields.h"
#include "postal_fields.h"
#define DEFAULT_GEONAMES_TSV LIBPOSTAL_GEONAMES_DIR PATH_SEPARATOR "geonames.tsv";
static bool read_geoname_from_line(geoname_t *g, char *line) {
size_t token_count;
char *token;
geoname_clear(g);
cstring_array *tokens = cstring_array_split(line, TAB_SEPARATOR, TAB_SEPARATOR_LEN, &token_count);
if (tokens == NULL) return false;
if (token_count != NUM_GEONAMES_FIELDS) {
log_error("Number of fields (%zu) != expected (%d)\n", token_count, NUM_GEONAMES_FIELDS);
goto exit_geoname_free_tokens;
}
token = cstring_array_get_string(tokens, GEONAMES_ID);
if (strlen(token) == 0) {
log_error("geonames_id is required\n");
goto exit_geoname_free_tokens;
}
sscanf(token, "%d", &g->geonames_id);
token = cstring_array_get_string(tokens, GEONAMES_CANONICAL);
char_array_cat(g->canonical, token);
token = cstring_array_get_string(tokens, GEONAMES_BOUNDARY_TYPE);
sscanf(token, "%d", (int *)&g->type);
token = cstring_array_get_string(tokens, GEONAMES_NAME);
char_array_cat(g->name, token);
token = cstring_array_get_string(tokens, GEONAMES_ISO_LANGUAGE);
char_array_cat(g->iso_language, token);
token = cstring_array_get_string(tokens, GEONAMES_HAS_WIKIPEDIA_ENTRY);
if (strlen(token) > 0) {
int has_wikipedia_entry;
sscanf(token, "%d", &has_wikipedia_entry);
g->has_wikipedia_entry = has_wikipedia_entry;
} else {
g->has_wikipedia_entry = false;
}
token = cstring_array_get_string(tokens, GEONAMES_IS_PREFERRED_NAME);
if (strlen(token) > 0) {
int is_preferred_name;
sscanf(token, "%d", &is_preferred_name);
g->is_preferred_name = is_preferred_name;
} else {
g->is_preferred_name = false;
}
token = cstring_array_get_string(tokens, GEONAMES_IS_SHORT_NAME);
if (strlen(token) > 0) {
int is_short_name;
sscanf(token, "%d", &is_short_name);
g->is_short_name = is_short_name;
} else {
g->is_short_name = false;
}
token = cstring_array_get_string(tokens, GEONAMES_IS_COLLOQUIAL);
if (strlen(token) > 0) {
int is_colloquial;
sscanf(token, "%d", &is_colloquial);
g->is_colloquial = is_colloquial;
} else {
g->is_colloquial = false;
}
token = cstring_array_get_string(tokens, GEONAMES_IS_HISTORICAL);
if (strlen(token) > 0) {
int is_historical;
sscanf(token, "%d", &is_historical);
g->is_historical = is_historical;
} else {
g->is_historical = false;
}
token = cstring_array_get_string(tokens, GEONAMES_POPULATION);
if (strlen(token) > 0) {
sscanf(token, "%d", &g->population);
} else {
g->population = 0;
}
token = cstring_array_get_string(tokens, GEONAMES_LATITUDE);
if (strlen(token) > 0) {
sscanf(token, "%lf", &g->latitude);
} else {
g->longitude = 0.0;
}
token = cstring_array_get_string(tokens, GEONAMES_LONGITUDE);
if (strlen(token) > 0) {
sscanf(token, "%lf", &g->longitude);
} else {
g->longitude = 0.0;
}
token = cstring_array_get_string(tokens, GEONAMES_FEATURE_CODE);
char_array_cat(g->feature_code, token);
token = cstring_array_get_string(tokens, GEONAMES_COUNTRY_CODE);
char_array_cat(g->country_code, token);
token = cstring_array_get_string(tokens, GEONAMES_COUNTRY_ID);
if (strlen(token) > 0) {
sscanf(token, "%d", &g->country_geonames_id);
} else {
g->country_geonames_id = 0;
}
token = cstring_array_get_string(tokens, GEONAMES_ADMIN1_CODE);
char_array_cat(g->admin1_code, token);
token = cstring_array_get_string(tokens, GEONAMES_ADMIN1_ID);
if (strlen(token) > 0) {
sscanf(token, "%d", &g->admin1_geonames_id);
} else {
g->admin1_geonames_id = 0;
}
token = cstring_array_get_string(tokens, GEONAMES_ADMIN2_CODE);
char_array_cat(g->admin2_code, token);
token = cstring_array_get_string(tokens, GEONAMES_ADMIN2_ID);
if (strlen(token) > 0) {
sscanf(token, "%d", &g->admin2_geonames_id);
} else {
g->admin2_geonames_id = 0;
}
token = cstring_array_get_string(tokens, GEONAMES_ADMIN3_CODE);
char_array_cat(g->admin3_code, token);
cstring_array_get_string(tokens, GEONAMES_ADMIN3_ID);
if (strlen(token) > 0) {
sscanf(token, "%d", &g->admin3_geonames_id);
} else {
g->admin3_geonames_id = 0;
}
token = cstring_array_get_string(tokens, GEONAMES_ADMIN4_CODE);
char_array_cat(g->admin4_code, token);
token = cstring_array_get_string(tokens, GEONAMES_ADMIN4_ID);
if (strlen(token)) {
sscanf(token, "%d", &g->admin4_geonames_id);
} else {
g->admin4_geonames_id = 0;
}
cstring_array_destroy(tokens);
return true;
exit_geoname_free_tokens:
cstring_array_destroy(tokens);
return false;
}
static bool read_gn_postal_code_from_line(gn_postal_code_t *postal, char *line) {
size_t token_count;
int i;
gn_postal_code_clear(postal);
char *token;
cstring_array *tokens = cstring_array_split(line, TAB_SEPARATOR, TAB_SEPARATOR_LEN, &token_count);
if (tokens == NULL) return false;
if (token_count != NUM_POSTAL_FIELDS) {
log_error("Number of fields (%zu) != expected (%d)\n", token_count, NUM_POSTAL_FIELDS);
goto exit_postal_tokens_created;
}
token = cstring_array_get_string(tokens, GN_POSTAL_CODE);
if (strlen(token) == 0) {
log_error("postal_code field required\n");
goto exit_postal_tokens_created;
}
char_array_cat(postal->postal_code, token);
token = cstring_array_get_string(tokens, GN_POSTAL_COUNTRY_CODE);
char_array_cat(postal->country_code, token);
token = cstring_array_get_string(tokens, GN_POSTAL_COUNTRY_GEONAMES_ID);
if (strlen(token) > 0) {
sscanf(token, "%d", &postal->country_geonames_id);
} else {
postal->country_geonames_id = 0;
}
token = cstring_array_get_string(tokens, GN_POSTAL_CONTAINING_GEONAME_ID);
char_array_cat(postal->containing_geoname, token);
char *admin1_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN1_IDS);
size_t admin1_field_len = strlen(admin1_field);
if (admin1_field_len > 0) {
size_t admin1_token_count;
cstring_array *admin1_tokens = cstring_array_split(admin1_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin1_token_count);
uint32_t admin1_id;
if (admin1_token_count > 0) {
for (i = 0; i < admin1_token_count; i++) {
char *admin1_token = cstring_array_get_string(tokens, i);
if (strlen(admin1_token) > 0) {
sscanf(admin1_token, "%u", &admin1_id);
uint32_array_push(postal->admin1_ids, admin1_id);
}
}
}
cstring_array_destroy(admin1_tokens);
}
char *admin2_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN2_IDS);
size_t admin2_field_len = strlen(admin2_field);
if (admin2_field_len > 0) {
size_t admin2_token_count;
cstring_array *admin2_tokens = cstring_array_split(admin2_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin2_token_count);
uint32_t admin2_id;
if (admin2_token_count > 0) {
for (i = 0; i < admin2_token_count; i++) {
char *admin2_token = cstring_array_get_string(admin2_tokens, i);
if (strlen(admin2_token) > 0) {
sscanf(admin2_token, "%u", &admin2_id);
uint32_array_push(postal->admin2_ids, admin2_id);
}
}
}
cstring_array_destroy(admin2_tokens);
}
char *admin3_field = cstring_array_get_string(tokens, GN_POSTAL_ADMIN3_IDS);
size_t admin3_field_len = strlen(admin3_field);
if (admin3_field_len > 0) {
size_t admin3_token_count;
cstring_array *admin3_tokens = cstring_array_split(admin3_field, COMMA_SEPARATOR, COMMA_SEPARATOR_LEN, &admin3_token_count);
uint32_t admin3_id;
if (admin3_token_count > 0) {
for (i = 0; i < admin3_token_count; i++) {
char *admin3_token = cstring_array_get_string(admin3_tokens, i);
if (strlen(admin3_token) > 0) {
sscanf(admin3_token, "%u", &admin3_id);
uint32_array_push(postal->admin3_ids, admin3_id);
}
}
}
cstring_array_destroy(admin3_tokens);
}
cstring_array_destroy(tokens);
return true;
exit_postal_tokens_created:
cstring_array_destroy(tokens);
return false;
}
typedef struct geodb_builder {
trie_t *names;
cstring_array *postal_codes;
trie_t *features;
graph_builder_t *feature_graph_builder;
sparkey_logwriter *log_writer;
} geodb_builder_t;
void geodb_builder_destroy(geodb_builder_t *self) {
if (self == NULL) return;
if (self->names != NULL) {
trie_destroy(self->names);
}
if (self->postal_codes != NULL) {
cstring_array_destroy(self->postal_codes);
}
if (self->features != NULL) {
trie_destroy(self->features);
}
if (self->feature_graph_builder != NULL) {
graph_builder_destroy(self->feature_graph_builder);
}
if (self->log_writer != NULL) {
sparkey_logwriter_close(&self->log_writer);
}
free(self);
}
geodb_builder_t *geodb_builder_new(char *log_filename) {
geodb_builder_t *builder = calloc(1, sizeof(geodb_builder_t));
if (builder == NULL) return NULL;
builder->names = trie_new();
if (builder->names == NULL) {
goto exit_destroy_builder;
}
builder->features = trie_new();
if (builder->features == NULL) {
goto exit_destroy_builder;
}
builder->postal_codes = cstring_array_new();
if (builder->postal_codes == NULL) {
goto exit_destroy_builder;
}
bool fixed_rows = false;
builder->feature_graph_builder = graph_builder_new(GRAPH_BIPARTITE, fixed_rows);
if (builder->feature_graph_builder == NULL) {
goto exit_destroy_builder;
}
sparkey_returncode ret_code = sparkey_logwriter_create(&builder->log_writer, log_filename, SPARKEY_COMPRESSION_NONE, 0);
if (ret_code != SPARKEY_SUCCESS) {
goto exit_destroy_builder;
}
return builder;
exit_destroy_builder:
geodb_builder_destroy(builder);
return NULL;
}
uint16_t get_address_component(uint32_t boundary_type) {
if (boundary_type == GEONAMES_LOCALITY) {
return GEONAMES_ADDRESS_COMPONENT_LOCALITY;
} else if (boundary_type == GEONAMES_NEIGHBORHOOD) {
return GEONAMES_ADDRESS_COMPONENT_NEIGHBORHOOD;
} else if (boundary_type == GEONAMES_ADMIN1) {
return GEONAMES_ADDRESS_COMPONENT_ADMIN1;
} else if (boundary_type == GEONAMES_COUNTRY) {
return GEONAMES_ADDRESS_COMPONENT_COUNTRY;
} else if (boundary_type == GEONAMES_ADMIN2) {
return GEONAMES_ADDRESS_COMPONENT_ADMIN2;
} else if (boundary_type == GEONAMES_ADMIN3) {
return GEONAMES_ADDRESS_COMPONENT_ADMIN3;
} else if (boundary_type == GEONAMES_ADMIN4) {
return GEONAMES_ADDRESS_COMPONENT_ADMIN4;
} else if (boundary_type == GEONAMES_ADMIN_OTHER) {
return GEONAMES_ADDRESS_COMPONENT_ADMIN_OTHER;
} else {
return 0;
}
}
bool geodb_builder_add_name(geodb_builder_t *self, char *key, bool is_canonical, uint16_t address_components) {
if (self == NULL || self->names == NULL) return false;
uint32_t node_id = trie_get(self->names, key);
geodb_value_t value;
value.value = 0;
if (node_id == NULL_NODE_ID) {
value.components |= address_components;
value.is_canonical = is_canonical;
value.count = 1;
return trie_add(self->names, key, value.value);
} else {
if (!trie_get_data_at_index(self->names, node_id, &value.value)) {
return false;
}
value.components |= address_components;
value.is_canonical = is_canonical;
value.count++;
return trie_set_data_at_index(self->names, node_id, value.value);
}
}
static inline uint32_t geodb_builder_get_feature_id(geodb_builder_t *self, char *key) {
uint32_t feature_id;
if (!trie_get_data(self->features, key, &feature_id)) {
feature_id = self->features->num_keys;
if (!trie_add(self->features, key, feature_id)) {
log_error("Could not add key to trie, aborting\n");
exit(EXIT_FAILURE);
}
}
return feature_id;
}
bool geodb_builder_finalize(geodb_builder_t *self, char *output_dir) {
char_array *path = char_array_new_size(strlen(output_dir));
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_NAMES_TRIE_FILENAME);
char *names_path = char_array_get_string(path);
trie_save(self->names, names_path);
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_FEATURES_TRIE_FILENAME);
char *features_path = char_array_get_string(path);
trie_save(self->features, features_path);
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_POSTAL_CODES_FILENAME);
char *postal_codes_path = char_array_get_string(path);
FILE *f = fopen(postal_codes_path, "wb");
uint64_t num_postal_strings = 0;
if (!file_write_uint64(f, (uint64_t)cstring_array_num_strings(self->postal_codes))) {
log_error("Could not write number of postal code strings\n");
return false;
}
size_t postal_codes_str_len = self->postal_codes->str->n;
if (!file_write_uint64(f, (uint64_t)postal_codes_str_len)) {
log_error("Could not write postal codes strings length\n");
return false;
}
if (!file_write_chars(f, self->postal_codes->str->a, postal_codes_str_len)) {
log_error("Could not write postal codes strings\n");
return false;
}
fclose(f);
char_array_clear(path);
bool sort_edges = false;
bool remove_duplicates = false;
graph_t *graph = graph_builder_finalize(self->feature_graph_builder, sort_edges, remove_duplicates);
self->feature_graph_builder = NULL;
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_FEATURE_GRAPH_FILENAME);
char *feature_graph_path = char_array_get_string(path);
if (!graph_save(graph, feature_graph_path)) {
log_error("Error saving graph\n");
return false;
}
graph_destroy(graph);
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_HASH_FILENAME);
char *hash_filename = strdup(char_array_get_string(path));
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_LOG_FILENAME);
char *log_filename = char_array_get_string(path);
if (self->log_writer != NULL) {
sparkey_logwriter_close(&self->log_writer);
self->log_writer = NULL;
}
if ((sparkey_hash_write(hash_filename, log_filename, 0)) != SPARKEY_SUCCESS) {
log_error("Could not write Sparkey hash file\n");
free(hash_filename);
char_array_destroy(path);
return false;
}
free(hash_filename);
char_array_destroy(path);
return true;
}
void import_geonames(geodb_builder_t *self, char *filename) {
FILE *f = fopen(filename, "r");
if (f == NULL) {
printf("Couldn't open file\n");
exit(1);
}
char *line;
char *prev_name = NULL;
geoname_t *g = geoname_new();
char_array *serialized = char_array_new();
graph_builder_t *features = self->feature_graph_builder;
khash_t(int_set) *all_ids = kh_init(int_set);
khash_t(int_set) *distinct_ids = kh_init(int_set);
khiter_t key;
int ret;
cstring_array *geo_features = cstring_array_new();
uint32_array *feature_lengths = uint32_array_new();
char id_string[INT32_MAX_STRING_SIZE + 1];
int normalize_utf8_options = NORMALIZE_STRING_COMPOSE | NORMALIZE_STRING_LOWERCASE | NORMALIZE_STRING_TRIM;
int i = 0;
int ambiguous = 0;
int disambiguations = 0;
while ((line = file_getline(f)) != NULL) {
read_geoname_from_line(g, line);
char *name = char_array_get_string(g->name);
char *canonical = char_array_get_string(g->canonical);
bool is_canonical = strcmp(name, canonical) == 0;
char *utf8_normalized = NULL;
size_t id_len = sprintf(id_string, "%d", g->geonames_id);
if (name != NULL) {
utf8_normalized = normalize_string_utf8(name, normalize_utf8_options);
}
if (utf8_normalized != NULL && (prev_name == NULL || strcmp(utf8_normalized, prev_name) != 0)) {
if (!geodb_builder_add_name(self, utf8_normalized, is_canonical, get_address_component(g->type))) {
log_error("Error adding geoname %s\n", utf8_normalized);
exit(EXIT_FAILURE);
}
if (kh_size(distinct_ids) > 1) {
ambiguous++;
uint32_t string_index = 0;
uint32_t lengths_index = 0;
uint32_t geonames_id;
kh_foreach_key(distinct_ids, key, {
disambiguations++;
uint32_t length = feature_lengths->a[lengths_index];
for (int i = 0; i < length; i++) {
char *token = cstring_array_get_string(geo_features, string_index);
uint32_t feature_id = geodb_builder_get_feature_id(self, token);
graph_builder_add_edge(self->feature_graph_builder, feature_id, geonames_id);
string_index++;
}
lengths_index++;
})
}
uint32_array_clear(feature_lengths);
cstring_array_clear(geo_features);
kh_clear(int_set, distinct_ids);
} else if (utf8_normalized != NULL) {
key = kh_get(int_set, distinct_ids, g->geonames_id);
if (key == kh_end(distinct_ids)) {
if (!geodb_builder_add_name(self, utf8_normalized, is_canonical, get_address_component(g->type))) {
log_error("Error adding geoname %s\n", utf8_normalized);
exit(EXIT_FAILURE);
}
}
} else {
log_error("normalization failed for name %s\n", name);
exit(EXIT_FAILURE);
}
char_array_clear(serialized);
if (!geoname_serialize(g, serialized)) {
log_error("geoname_serialize failed for id=%d\n", g->geonames_id);
exit(EXIT_FAILURE);
}
key = kh_get(int_set, all_ids, g->geonames_id);
if (key == kh_end(all_ids)) {
if ((sparkey_logwriter_put(self->log_writer, strlen(id_string), (uint8_t *)id_string, serialized->n, (uint8_t *)char_array_get_string(serialized))) != SPARKEY_SUCCESS) {
log_error("Error writing to Sparkey with id=%d\n", g->geonames_id);
exit(EXIT_FAILURE);
}
key = kh_put(int_set, all_ids, g->geonames_id, &ret);
}
key = kh_get(int_set, distinct_ids, g->geonames_id);
if (key == kh_end(distinct_ids)) {
key = kh_put(int_set, distinct_ids, g->geonames_id, &ret);
if (ret < 0) {
log_error("Error adding id %d to set\n", g->geonames_id);
exit(EXIT_FAILURE);
}
char_array_clear(g->name);
char_array_cat(g->name, utf8_normalized);
size_t prev_num_geo_features = cstring_array_num_strings(geo_features);
if (!geodisambig_add_geoname_features(geo_features, g)) {
log_error("Could not add geonames features for id=%d\n", g->geonames_id);
exit(EXIT_FAILURE);
}
uint32_t num_geo_features = cstring_array_num_strings(geo_features);
uint32_t feature_length = (uint32_t)(num_geo_features - prev_num_geo_features);
uint32_array_push(feature_lengths, feature_length);
}
if (prev_name != NULL) {
free(prev_name);
prev_name = NULL;
}
if (utf8_normalized != NULL) {
prev_name = utf8_normalized;
}
free(line);
i++;
if (i % 1000 == 0) {
log_info("Did %d geonames, %d ambiguous, %d disambiguations, names=%d, features=%d\n", i, ambiguous, disambiguations, self->names->num_keys, self->features->num_keys);
}
}
if (prev_name != NULL) {
free(prev_name);
}
uint32_array_destroy(feature_lengths);
cstring_array_destroy(geo_features);
kh_destroy(int_set, distinct_ids);
kh_destroy(int_set, all_ids);
char_array_destroy(serialized);
geoname_destroy(g);
fclose(f);
}
void import_geonames_postal_codes(geodb_builder_t *self, char *filename) {
FILE *f = fopen(filename, "r");
if (f == NULL) {
printf("Couldn't open file\n");
exit(1);
}
char *line;
char *prev_code = NULL;
gn_postal_code_t *pc = gn_postal_code_new();
char_array *postal_code = char_array_new();
char_array *serialized = char_array_new();
cstring_array *postal_code_features = cstring_array_new();
khiter_t key;
int ret;
int i = 0;
bool is_canonical = true;
while ((line = file_getline(f)) != NULL) {
if (!read_gn_postal_code_from_line(pc, line)) {
log_error("Error reading line: %s\n", line);
exit(EXIT_FAILURE);
}
char *code = char_array_get_string(pc->postal_code);
char *utf8_normalized = normalize_string_utf8(code, NORMALIZE_STRING_LOWERCASE);
if (utf8_normalized == NULL) {
log_error("normalization failed for postal code %s\n", code);
exit(EXIT_FAILURE);
}
geodb_builder_add_name(self, utf8_normalized, is_canonical, GEONAMES_ADDRESS_COMPONENT_POSTCODE);
char_array_clear(serialized);
if (!gn_postal_code_serialize(pc, serialized)) {
log_error("gn_postal_code_serialize failed for postal code=%s\n", code);
exit(EXIT_FAILURE);
}
char *country_code = char_array_get_string(pc->country_code);
char_array_clear(postal_code);
char_array_cat_joined(postal_code, NAMESPACE_SEPARATOR_CHAR, false, 2, country_code, utf8_normalized);
char *key = char_array_get_string(postal_code);
cstring_array_add_string(self->postal_codes, key);
uint32_t postal_code_index = (uint32_t)cstring_array_num_strings(self->postal_codes);
cstring_array_clear(postal_code_features);
char_array_clear(pc->postal_code);
char_array_cat(pc->postal_code, utf8_normalized);
if (sparkey_logwriter_put(self->log_writer, strlen(key), (uint8_t *)key, serialized->n, (uint8_t *)char_array_get_string(serialized)) != SPARKEY_SUCCESS) {
log_error("Error writing key %s to Sparkey\n", key);
}
if (!geodisambig_add_postal_code_features(postal_code_features, pc)) {
log_error("Could not add geonames features for postal code=%s\n", code);
exit(EXIT_FAILURE);
}
for (int i = 0; i < cstring_array_num_strings(postal_code_features); i++) {
char *token = cstring_array_get_string(postal_code_features, i);
uint32_t feature_id = geodb_builder_get_feature_id(self, token);
graph_builder_add_edge(self->feature_graph_builder, feature_id, postal_code_index);
}
if (prev_code != NULL) {
free(prev_code);
prev_code = NULL;
}
if (utf8_normalized != NULL) {
prev_code = utf8_normalized;
}
free(line);
i++;
if (i % 1000 == 0) {
log_info("Did %d postal codes\n", i);
}
}
if (prev_code != NULL) {
free(prev_code);
}
char_array_destroy(postal_code);
char_array_destroy(serialized);
cstring_array_destroy(postal_code_features);
gn_postal_code_destroy(pc);
fclose(f);
}
int main(int argc, char **argv) {
char *input_dir;
char *output_dir;
if (argc > 2) {
input_dir = argv[1];
output_dir = argv[2];
} else {
input_dir = LIBPOSTAL_GEONAMES_DIR;
output_dir = LIBPOSTAL_GEODB_DIR;
}
char *geonames_filename = "geonames.tsv";
char_array *path = char_array_new_size(strlen(input_dir));
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, input_dir, geonames_filename);
char *geonames_path = strdup(char_array_get_string(path));
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, output_dir, GEODB_LOG_FILENAME);
char *log_filename = char_array_get_string(path);
geodb_builder_t *builder = geodb_builder_new(log_filename);
import_geonames(builder, geonames_path);
free(geonames_path);
printf("\n\n");
char *postal_codes_filename = "postal_codes.tsv";
char_array_clear(path);
char_array_cat_joined(path, PATH_SEPARATOR, true, 2, input_dir, postal_codes_filename);
char *postal_codes_path = char_array_get_string(path);
log_info("Doing postal_codes\n");
import_geonames_postal_codes(builder, postal_codes_path);
char_array_destroy(path);
if (!geodb_builder_finalize(builder, output_dir)) {
exit(EXIT_FAILURE);
}
geodb_builder_destroy(builder);
exit(EXIT_SUCCESS);
}