kodama-capi 0.1.2

A C API for the kodama agglomerative hierarchical clustering library.
Documentation
#ifndef _KODAMA_H
#define _KODAMA_H

#include <stdlib.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * A stepwise dendrogram.
 *
 * A dendrogram consists of a series of `N - 1` steps, where `N` is the number
 * of observations that were clustered.
 *
 * Users of this library responsible for freeing the resources associated with
 * a dendrogram using `kodama_dendrogram_free`.
 */
typedef struct kodama_dendrogram kodama_dendrogram;

/**
 * A single step in a dendrogram.
 *
 * Each step corresponds to the creation of a new cluster by merging two
 * previous clusters.
 *
 * By convention, the smaller cluster label is always assigned to the
 * `cluster1` field.
 */
typedef struct kodama_step {
    /* The label corresponding to the first cluster. */
    size_t cluster1;
    /* The label corresponding to the second cluster. */
    size_t cluster2;
    /* The dissimilarity between cluster1 and cluster2. */
    double dissimilarity;
    /* The total number of observations in this merged cluster. */
    size_t size;
} kodama_step;

/**
 * A method for computing dissimilarities between clusters.
 *
 * The method dictates how the dissimilarities are computed whenever a new
 * cluster is formed. In particular, when clusters `a` and `b` are merged into
 * a new cluster `ab`, then the pairwise dissimilarity between `ab` and every
 * other cluster is computed using one of the variants of this type.
 */
typedef enum kodama_method {
    kodama_method_single,
    kodama_method_complete,
    kodama_method_average,
    kodama_method_weighted,
    kodama_method_ward,
    kodama_method_centroid,
    kodama_method_median,
} kodama_method;

/**
 * Return a hierarchical clustering of observations given their pairwise
 * dissimilarities as double-precision floating point numbers.
 *
 * The pairwise dissimilarities must be provided as a *condensed pairwise
 * dissimilarity matrix*, where only the values in the upper triangle are
 * explicitly represented, not including the diagonal. As a result, the given
 * matrix should have length `observations-choose-2` (which is
 * `(observations * (observations - 1)) / 2`) and only have values defined for
 * pairs of `(a, b)` where `a < b`.
 *
 * `observations` is the total number of observations that are being clustered.
 * Every pair of observations must have a finite non-NaN dissimilarity.
 *
 * The return value is dendrogram that the caller is responsible for freeing.
 * The dendrogram encodes a hierarchical clustering as a sequence of
 * `observations - 1` steps, where each step corresponds to the creation of
 * a cluster by merging exactly two previous clusters. The very last cluster
 * created contains all observations.
 *
 * Note that it is an unchecked runtime error to provide a matrix with a length
 * that is consistent with the number of observations.
 */
kodama_dendrogram *kodama_linkage_double(
    double *condensed_dissimilarity_matrix,
    size_t observations,
    kodama_method method);

/**
 * Return a hierarchical clustering of observations given their pairwise
 * dissimilarities as single-precision floating point numbers.
 *
 * The pairwise dissimilarities must be provided as a *condensed pairwise
 * dissimilarity matrix*, where only the values in the upper triangle are
 * explicitly represented, not including the diagonal. As a result, the given
 * matrix should have length `observations-choose-2` (which is
 * `(observations * (observations - 1)) / 2`) and only have values defined for
 * pairs of `(a, b)` where `a < b`.
 *
 * `observations` is the total number of observations that are being clustered.
 * Every pair of observations must have a finite non-NaN dissimilarity.
 *
 * The return value is dendrogram that the caller is responsible for freeing.
 * The dendrogram encodes a hierarchical clustering as a sequence of
 * `observations - 1` steps, where each step corresponds to the creation of
 * a cluster by merging exactly two previous clusters. The very last cluster
 * created contains all observations.
 *
 * Note that it is an unchecked runtime error to provide a matrix with a length
 * that is consistent with the number of observations.
 */
kodama_dendrogram *kodama_linkage_float(
    float *condensed_dissimilarity_matrix,
    size_t observations,
    kodama_method method);

/**
 * Returns the total number of steps in the given dendrogram.
 */
size_t kodama_dendrogram_len(const kodama_dendrogram *dend);

/**
 * Returns the total number of observations that are clustered in this
 * dendrogram.
 *
 * This is always equivalent to the number of observations provided in the
 * linkage clustering functions.
 */
size_t kodama_dendrogram_observations(const kodama_dendrogram *dend);

/**
 * Return an array of steps that make up the given dendrogram.
 *
 * The array returned should NOT be freed by the caller. Instead, the resources
 * consumed by the array should be freed by freeing the entire dendrogram.
 */
kodama_step *kodama_dendrogram_steps(const kodama_dendrogram *dend);

/**
 * Free the resources associated with the given dendrogram.
 *
 * A dendrogram cannot be used after it has been freed.
 */
void kodama_dendrogram_free(kodama_dendrogram *dend);

#ifdef __cplusplus
}
#endif

#endif