/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package lance.index.pb;
// The type of an index.
enum IndexType {
// Vector index
VECTOR = 0;
}
message Index {
// The unique index name in the dataset.
string name = 1;
// Columns to be used to build the index.
repeated string columns = 2;
// The version of the dataset this index was built from.
uint64 dataset_version = 3;
// The [`IndexType`] of the index.
IndexType index_type = 4;
/// Index implementation details.
oneof implementation {
VectorIndex vector_index = 5;
}
}
// Inverted Index File Metadata.
message IVF {
// Centroids of partitions. `dimension * num_partitions` of float32s.
repeated float centroids = 1;
// File offset of each partition.
repeated uint64 offsets = 2;
// Number of records in the partition.
repeated uint32 lengths = 3;
}
// Product Quantization.
message PQ {
// The number of bits to present a centroid.
uint32 num_bits = 1;
// Number of sub vectors.
uint32 num_sub_vectors = 2;
// Vector dimension
uint32 dimension = 3;
// Codebook. `dimension * 2 ^ num_bits` of float32s.
repeated float codebook = 4;
}
// Transform type
enum TransformType {
OPQ = 0;
}
// A transform matrix to apply to a vector or vectors.
message Transform {
// The file offset the matrix is stored
uint64 position = 1;
// Data shape of the matrix, [rows, cols].
repeated uint32 shape = 2;
// Transform type.
TransformType type = 3;
}
// Flat Index
message Flat {}
// DiskAnn Index
message DiskAnn {
// Graph spec version
uint32 spec = 1;
// Graph file
string filename = 2;
// r parameter
uint32 r = 3;
// alpha parameter
float alpha = 4;
// L parameter
uint32 L = 5;
/// Entry points to the graph
repeated uint64 entries = 6;
}
// One stage in the vector index pipeline.
message VectorIndexStage {
oneof stage {
// Flat index
Flat flat = 1;
// `IVF` - Inverted File
IVF ivf = 2;
// Product Quantization
PQ pq = 3;
// Transformer
Transform transform = 4;
// DiskANN
DiskAnn diskann = 5;
}
}
// Metric Type for Vector Index
enum VectorMetricType {
// L2 (Euclidean) Distance
L2 = 0;
// Cosine Distance
Cosine = 1;
// Dot Product
Dot = 2;
}
// Vector Index Metadata
message VectorIndex {
// Index specification version.
uint32 spec_version = 1;
// Vector dimension;
uint32 dimension = 2;
// Composed vector index stages.
//
// For example, `IVF_PQ` index type can be expressed as:
//
// ```no_run,ignore
// let stages = vec![Ivf{}, PQ{num_bits: 8, num_sub_vectors: 16}]
// ```
repeated VectorIndexStage stages = 3;
// Vector distance metrics type
VectorMetricType metric_type = 4;
}