1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Build IVF model
use std::sync::Arc;
use arrow_array::{Array, FixedSizeListArray};
use snafu::{location, Location};
use lance_core::error::{Error, Result};
/// Parameters to build IVF partitions
#[derive(Debug, Clone)]
pub struct IvfBuildParams {
/// Number of partitions to build.
pub num_partitions: usize,
// ---- kmeans parameters
/// Max number of iterations to train kmeans.
pub max_iters: usize,
/// Use provided IVF centroids.
pub centroids: Option<Arc<FixedSizeListArray>>,
pub sample_rate: usize,
pub precomputed_partitons_file: Option<String>,
}
impl Default for IvfBuildParams {
fn default() -> Self {
Self {
num_partitions: 32,
max_iters: 50,
centroids: None,
sample_rate: 256, // See faiss
precomputed_partitons_file: None,
}
}
}
impl IvfBuildParams {
/// Create a new instance of `IvfBuildParams`.
pub fn new(num_partitions: usize) -> Self {
Self {
num_partitions,
..Default::default()
}
}
/// Create a new instance of [`IvfBuildParams`] with centroids.
pub fn try_with_centroids(
num_partitions: usize,
centroids: Arc<FixedSizeListArray>,
) -> Result<Self> {
if num_partitions != centroids.len() {
return Err(Error::Index {
message: format!(
"IvfBuildParams::try_with_centroids: num_partitions {} != centroids.len() {}",
num_partitions,
centroids.len()
),
location: location!(),
});
}
Ok(Self {
num_partitions,
centroids: Some(centroids),
..Default::default()
})
}
}