Trait abd_clam::Dataset

source ·

pub trait Dataset<I: Instance, U: Number>: Debug + Send + Sync + Index<usize, Output = I> + Clone {
Show 27 methods    // Required methods
    fn type_name() -> String;
    fn name(&self) -> &str;
    fn cardinality(&self) -> usize;
    fn is_metric_expensive(&self) -> bool;
    fn metric(&self) -> fn(_: &I, _: &I) -> U;
    fn set_permuted_indices(&mut self, indices: Option<&[usize]>);
    fn swap(&mut self, left: usize, right: usize) -> Result<(), String>;
    fn permuted_indices(&self) -> Option<&[usize]>;
    fn make_shards(self, max_cardinality: usize) -> Vec<Self>
       where Self: Sized;
    fn save(&self, path: &Path) -> Result<(), String>;
    fn load(
        path: &Path,
        metric: fn(_: &I, _: &I) -> U,
        is_expensive: bool,
    ) -> Result<Self, String>
       where Self: Sized;

    // Provided methods
    fn permute_instances(&mut self, permutation: &[usize]) -> Result<(), String> { ... }
    fn original_index(&self, index: usize) -> usize { ... }
    fn one_to_one(&self, left: usize, right: usize) -> U { ... }
    fn are_instances_equal(&self, left: usize, right: usize) -> bool { ... }
    fn one_to_many(&self, left: usize, right: &[usize]) -> Vec<U> { ... }
    fn many_to_many(&self, left: &[usize], right: &[usize]) -> Vec<Vec<U>> { ... }
    fn pairs(&self, index_pairs: &[(usize, usize)]) -> Vec<U> { ... }
    fn pairwise(&self, indices: &[usize]) -> Vec<Vec<U>> { ... }
    fn query_to_one(&self, query: &I, index: usize) -> U { ... }
    fn query_to_many(&self, query: &I, indices: &[usize]) -> Vec<U> { ... }
    fn choose_unique(
        &self,
        n: usize,
        indices: &[usize],
        seed: Option<u64>,
    ) -> Vec<usize> { ... }
    fn median(&self, indices: &[usize]) -> Option<usize> { ... }
    fn linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)> { ... }
    fn par_linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)> { ... }
    fn linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)> { ... }
    fn par_linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)> { ... }
}

Expand description

A common interface for datasets used in CLAM.

Required Methods§

source

fn type_name() -> String

Returns the name of the type of the dataset.

source

fn name(&self) -> &str

Returns the name of the dataset. This is used to identify the dataset in various places.

source

fn cardinality(&self) -> usize

Returns the number of instances in the dataset.

source

fn is_metric_expensive(&self) -> bool

Whether or not the metric is expensive to calculate.

If the metric is expensive to calculate, CLAM will enable more parallelism when calculating distances.

source

fn metric(&self) -> fn(_: &I, _: &I) -> U

Returns the metric used to calculate distances between instances.

A metric should obey the following properties:

Identity: d(x, y) = 0 <=> x = y
Non-negativity: d(x, y) >= 0
Symmetry: d(x, y) = d(y, x)

If the metric also obeys the triangle inequality, d(x, z) <= d(x, y) + d(y, z), then CLAM can make certain guarantees about the exactness of search results.

source

fn set_permuted_indices(&mut self, indices: Option<&[usize]>)

Sets the permutation of indices that was used to reorder the dataset.

This is primarily used when permuting the dataset to reorder it after building a tree.

§Arguments

indices - The permutation of indices.

source

fn swap(&mut self, left: usize, right: usize) -> Result<(), String>

Swaps the location of two instances in the dataset.

This is primarily used when permuting the dataset to reorder it after building a tree.

§Arguments

left - An index in the dataset.
right - An index in the dataset.

§Errors

If there is an error swapping the instances in the implementor.

§Panics

If either left or right are invalid indices in the dataset.

source

fn permuted_indices(&self) -> Option<&[usize]>

Returns the permutation of indices that was used to reorder the dataset.

§Returns

Some if the dataset was permuted.
None otherwise.

source

fn make_shards(self, max_cardinality: usize) -> Vec<Self>
where Self: Sized,

Makes a vector of sharded datasets from the given dataset.

Each shard will be a random subset of the dataset, and will have a cardinality of at most max_cardinality. The shards will be disjoint subsets of the dataset.

§Arguments

max_cardinality - The maximum cardinality of each shard.

source

fn save(&self, path: &Path) -> Result<(), String>

Saves the dataset to a file.

§Arguments

path - The path to the file to save the dataset to.

§Errors

If the dataset cannot be saved to the given path.

source

fn load( path: &Path, metric: fn(_: &I, _: &I) -> U, is_expensive: bool, ) -> Result<Self, String>
where Self: Sized,

Loads a dataset from a file.

§Arguments

path - The path to the file to load the dataset from.
metric - The metric to use for the dataset.
is_expensive - Whether or not the metric is expensive to calculate.

§Errors

If the dataset cannot be loaded from the given path.
If the dataset is not the same type as the one that was saved.
If the file was corrupted.

Provided Methods§

source

fn permute_instances(&mut self, permutation: &[usize]) -> Result<(), String>

Reorders the internal order of instances by a given permutation of indices.

§Arguments

permutation - A permutation of indices in the dataset.

§Errors

See swap.

§Panics

If any of the indices in permutation are invalid indices in the dataset.

source

fn original_index(&self, index: usize) -> usize

Get the index before the dataset was reordered. If the dataset was not reordered, this is the identity function.

source

fn one_to_one(&self, left: usize, right: usize) -> U

Calculates the distance between two indexed instances in the dataset.

§Arguments

left - An index in the dataset.
right - An index in the dataset.

§Returns

The distance between the instances at left and right.

source

fn are_instances_equal(&self, left: usize, right: usize) -> bool

Returns whether or not two indexed instances in the dataset are equal.

As per the definition of a metric, this should return true if and only if the distance between the two instances is zero.

§Arguments

left - An index in the dataset
right - An index in the dataset

§Returns

true if the instances are equal, false otherwise

source

fn one_to_many(&self, left: usize, right: &[usize]) -> Vec

Returns a vector of distances.

§Arguments

left - An index in the dataset
right - A slice of indices in the dataset

§Returns

A vector of distances between the instance at left and all instances at right

source

fn many_to_many(&self, left: &[usize], right: &[usize]) -> Vec<Vec>

Returns a vector of vectors of distances.

§Arguments

left - A slice of indices in the dataset.
right - A slice of indices in the dataset.

§Returns

A vector of vectors of distances between the instances at left and all instances at right

source

fn pairs(&self, index_pairs: &[(usize, usize)]) -> Vec

Returns a vector of distances between the given pairs of indexed instances.

§Arguments

index_pairs - A slice of pairs of indices in the dataset.

§Returns

A vector of distances between the given pairs of instances.

source

fn pairwise(&self, indices: &[usize]) -> Vec<Vec>

Returns a vector of distances between all pairs of indexed instances.

§Arguments

indices - A slice of indices in the dataset.

§Returns

A vector of vectors of distances between all pairs of instances at indices

source

fn query_to_one(&self, query: &I, index: usize) -> U

Calculates the distance between a query and an indexed instance in the dataset.

§Arguments

query - A query instance
index - An index in the dataset

§Returns

The distance between the query and the instance at index

source

fn query_to_many(&self, query: &I, indices: &[usize]) -> Vec

Returns a vector of distances between a query and all indexed instances.

§Arguments

query - A query instance.
indices - A slice of indices in the dataset.

§Returns

A vector of distances between the query and all instances at indices

source

fn choose_unique( &self, n: usize, indices: &[usize], seed: Option<u64>, ) -> Vec<usize>

Chooses a subset of indices that are unique with respect to the metric.

§Arguments

n - The number of unique indices to choose.
indices - A slice of indices in the dataset from which to choose.
seed - An optional seed for the random number generator.

§Returns

A vector of indices that are unique with respect to the metric. All indices in the vector are such that no two instances are equal.

source

fn median(&self, indices: &[usize]) -> Option<usize>

Calculates the geometric median of a set of indexed instances. Returns a value from the set of indices that is the index of the median in the dataset.

Note: This default implementation does not scale well to arbitrarily large inputs.

§Arguments

indices - A subset of indices from the dataset

§Panics

If indices is empty.

§Returns

The index of the median in the dataset, if indices is not empty.
None, if indices is empty.

source

fn linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)>

Runs linear KNN search on the dataset.

source

fn par_linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)>

Runs parallelized linear KNN search on the dataset.

source

fn linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)>

Runs linear RNN search on the dataset.

source

fn par_linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)>

Runs parallelized linear RNN search on the dataset.

Object Safety§

This trait is not object safe.

Implementors§

source §

Trait abd_clam::DatasetCopy item path

Required Methods§

fn type_name() -> String

fn name(&self) -> &str

fn cardinality(&self) -> usize

fn is_metric_expensive(&self) -> bool

fn metric(&self) -> fn(_: &I, _: &I) -> U

fn set_permuted_indices(&mut self, indices: Option<&[usize]>)

§Arguments

fn swap(&mut self, left: usize, right: usize) -> Result<(), String>

§Arguments

§Errors

§Panics

fn permuted_indices(&self) -> Option<&[usize]>

§Returns

fn make_shards(self, max_cardinality: usize) -> Vec<Self>where Self: Sized,

§Arguments

fn save(&self, path: &Path) -> Result<(), String>

§Arguments

§Errors

fn load( path: &Path, metric: fn(_: &I, _: &I) -> U, is_expensive: bool, ) -> Result<Self, String>where Self: Sized,

§Arguments

§Errors

Provided Methods§

fn permute_instances(&mut self, permutation: &[usize]) -> Result<(), String>

§Arguments

§Errors

§Panics

fn original_index(&self, index: usize) -> usize

fn one_to_one(&self, left: usize, right: usize) -> U

§Arguments

§Returns

fn are_instances_equal(&self, left: usize, right: usize) -> bool

§Arguments

§Returns

fn one_to_many(&self, left: usize, right: &[usize]) -> Vec<U>

§Arguments

§Returns

fn many_to_many(&self, left: &[usize], right: &[usize]) -> Vec<Vec<U>>

§Arguments

§Returns

fn pairs(&self, index_pairs: &[(usize, usize)]) -> Vec<U>

§Arguments

§Returns

fn pairwise(&self, indices: &[usize]) -> Vec<Vec<U>>

§Arguments

§Returns

fn query_to_one(&self, query: &I, index: usize) -> U

§Arguments

§Returns

fn query_to_many(&self, query: &I, indices: &[usize]) -> Vec<U>

§Arguments

§Returns

fn choose_unique( &self, n: usize, indices: &[usize], seed: Option<u64>, ) -> Vec<usize>

§Arguments

§Returns

fn median(&self, indices: &[usize]) -> Option<usize>

§Arguments

§Panics

§Returns

fn linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)>

fn par_linear_knn(&self, query: &I, k: usize) -> Vec<(usize, U)>

fn linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)>

fn par_linear_rnn(&self, query: &I, radius: U) -> Vec<(usize, U)>

Object Safety§

Implementors§

impl<I: Instance, U: Number, M: Instance> Dataset<I, U> for VecDataset<I, U, M>

Trait abd_clam::Dataset

fn make_shards(self, max_cardinality: usize) -> Vec<Self>
where Self: Sized,

fn load( path: &Path, metric: fn(_: &I, _: &I) -> U, is_expensive: bool, ) -> Result<Self, String>
where Self: Sized,