libpostal-sys 0.1.1

Low-level wrappers for libpostal address normalization (with locks to support thread-safe initialization)
Documentation
#include "averaged_perceptron.h"

#define PERCEPTRON_SIGNATURE 0xCBCBCBCB

static inline bool averaged_perceptron_get_feature_id(averaged_perceptron_t *self, char *feature, uint32_t *feature_id) {
    return trie_get_data(self->features, feature, feature_id);
}

inline double_array *averaged_perceptron_predict_scores(averaged_perceptron_t *self, cstring_array *features) {
    if (self->scores == NULL || self->scores->n == 0) self->scores = double_array_new_zeros((size_t)self->num_classes);

    double_array_zero(self->scores->a, self->scores->n);

    double *scores = self->scores->a;

    uint32_t i = 0;
    char *feature;
    uint32_t feature_id;

    uint32_t *indptr = self->weights->indptr->a;
    uint32_t *indices = self->weights->indices->a;
    double *data = self->weights->data->a;

    cstring_array_foreach(features, i, feature, {
        if (!averaged_perceptron_get_feature_id(self, feature, &feature_id)) {
            continue;
        }

        for (int col = indptr[feature_id]; col < indptr[feature_id + 1]; col++) {
            uint32_t class_id = indices[col];
            scores[class_id] += data[col];
        }
    })

    return self->scores;   
}

inline double_array *averaged_perceptron_predict_scores_counts(averaged_perceptron_t *self, khash_t(str_uint32) *feature_counts) {
    if (self->scores == NULL || self->scores->n == 0) self->scores = double_array_new_zeros((size_t)self->num_classes);

    double_array_zero(self->scores->a, self->scores->n);

    double *scores = self->scores->a;

    uint32_t i = 0;
    const char *feature;
    uint32_t count;
    uint32_t feature_id;

    uint32_t *indptr = self->weights->indptr->a;
    uint32_t *indices = self->weights->indices->a;
    double *data = self->weights->data->a;

    kh_foreach(feature_counts, feature, count, {
        if (!averaged_perceptron_get_feature_id(self, (char *)feature, &feature_id)) {
            continue;
        }

        for (int col = indptr[feature_id]; col < indptr[feature_id + 1]; col++) {
            uint32_t class_id = indices[col];
            scores[class_id] += data[col] * (double)count;
        }
    })

    return self->scores;
}


inline uint32_t averaged_perceptron_predict(averaged_perceptron_t *self, cstring_array *features) {
    double_array *scores = averaged_perceptron_predict_scores(self, features);

    int64_t max_score = double_array_argmax(scores->a, scores->n);

    return (uint32_t)max_score;

}

inline uint32_t averaged_perceptron_predict_counts(averaged_perceptron_t *self, khash_t(str_uint32) *feature_counts) {
    double_array *scores = averaged_perceptron_predict_scores_counts(self, feature_counts);

    int64_t max_score = double_array_argmax(scores->a, scores->n);

    return (uint32_t)max_score;
}

averaged_perceptron_t *averaged_perceptron_read(FILE *f) {
    if (f == NULL) return NULL;

    uint32_t signature;

    if (!file_read_uint32(f, &signature) || signature != PERCEPTRON_SIGNATURE) {
        return NULL;
    }

    averaged_perceptron_t *perceptron = calloc(1, sizeof(averaged_perceptron_t));

    if (!file_read_uint32(f, &perceptron->num_features) ||
        !file_read_uint32(f, &perceptron->num_classes) ||
        perceptron->num_classes == 0) {
        return NULL;
    }

    perceptron->weights = sparse_matrix_read(f);
    if (perceptron->weights == NULL) {
        goto exit_perceptron_created;
    }

    perceptron->scores = double_array_new_zeros((size_t)perceptron->num_classes);

    if (perceptron->scores == NULL) {
        goto exit_perceptron_created;
    }

    uint64_t classes_str_len;

    if (!file_read_uint64(f, &classes_str_len)) {
        goto exit_perceptron_created;
    }

    char_array *array = char_array_new_size(classes_str_len);

    if (array == NULL) {
        goto exit_perceptron_created;
    }

    if (!file_read_chars(f, array->a, classes_str_len)) {
        char_array_destroy(array);
        goto exit_perceptron_created;
    }

    array->n = classes_str_len;

    perceptron->classes = cstring_array_from_char_array(array);
    if (perceptron->classes == NULL) {
        goto exit_perceptron_created;
    }

    perceptron->features = trie_read(f);

    if (perceptron->features == NULL) {
        goto exit_perceptron_created;
    }

    return perceptron;

exit_perceptron_created:
    averaged_perceptron_destroy(perceptron);
    return NULL;
}

averaged_perceptron_t *averaged_perceptron_load(char *filename) {
    if (filename == NULL) return NULL;
    FILE *f = fopen(filename, "rb");
    if (f == NULL) return NULL;
    averaged_perceptron_t *perceptron = averaged_perceptron_read(f);
    fclose(f);
    return perceptron;
}

bool averaged_perceptron_write(averaged_perceptron_t *self, FILE *f) {
    if (self == NULL || f == NULL || self->weights == NULL || self->classes == NULL ||
        self->features == NULL) {
        return false;
    }

    if (!file_write_uint32(f, PERCEPTRON_SIGNATURE) ||
        !file_write_uint32(f, self->num_features) ||
        !file_write_uint32(f, self->num_classes)) {
        return false;
    }

    if (!sparse_matrix_write(self->weights, f)) {
        return false;
    }

    uint64_t classes_str_len = (uint64_t) cstring_array_used(self->classes);
    if (!file_write_uint64(f, classes_str_len)) {
        return false;
    }

    if (!file_write_chars(f, self->classes->str->a, classes_str_len)) {
        return false;
    }

    if (!trie_write(self->features, f)) {
        return false;
    }

    return true;
}

bool averaged_perceptron_save(averaged_perceptron_t *self, char *filename) {
    if (self == NULL || filename == NULL) return false;
    FILE *f = fopen(filename, "wb");
    if (f == NULL) return false;
    bool ret_val = averaged_perceptron_write(self, f);
    fclose(f);
    return ret_val;
}


void averaged_perceptron_destroy(averaged_perceptron_t *self) {
    if (self == NULL) return;

    if (self->features != NULL) {
        trie_destroy(self->features);
    }

    if (self->classes != NULL) {
        cstring_array_destroy(self->classes);
    }

    if (self->weights != NULL) {
        sparse_matrix_destroy(self->weights);
    }

    if (self->scores != NULL) {
        double_array_destroy(self->scores);
    }

    free(self);
}