pub trait StorageBackend: Send + Sync {
Show 37 methods
// Required methods
fn get_base(&self) -> String;
fn get_name(&self) -> String;
fn base_path(&self) -> PathBuf;
fn metadata_path(&self) -> PathBuf;
fn basepath_to_uri(&self) -> String;
async fn load_dense_from_file(
&self,
path: &Path,
) -> StorageResult<DenseMatrix<f64>>;
fn file_path(&self, key: &str) -> PathBuf;
async fn save_dense(
&self,
key: &str,
matrix: &DenseMatrix<f64>,
md_path: &Path,
) -> StorageResult<()>;
async fn load_dense(&self, key: &str) -> StorageResult<DenseMatrix<f64>>;
async fn save_sparse(
&self,
key: &str,
matrix: &CsMat<f64>,
md_path: &Path,
) -> StorageResult<()>;
async fn load_sparse(&self, key: &str) -> StorageResult<CsMat<f64>>;
async fn save_lambdas(
&self,
lambdas: &[f64],
md_path: &Path,
) -> StorageResult<()>;
async fn load_lambdas(&self) -> StorageResult<Vec<f64>>;
async fn save_index(
&self,
key: &str,
vector: &[usize],
md_path: &Path,
) -> StorageResult<()>;
async fn save_vector(
&self,
key: &str,
vector: &[f64],
md_path: &Path,
) -> StorageResult<()>;
async fn save_centroid_map(
&self,
map: &[usize],
md_path: &Path,
) -> StorageResult<()>;
async fn load_centroid_map(&self) -> StorageResult<Vec<usize>>;
async fn save_subcentroid_lambdas(
&self,
lambdas: &[f64],
md_path: &Path,
) -> StorageResult<()>;
async fn load_subcentroid_lambdas(&self) -> StorageResult<Vec<f64>>;
async fn save_subcentroids(
&self,
subcentroids: &DenseMatrix<f64>,
md_path: &Path,
) -> StorageResult<()>;
async fn load_subcentroids(&self) -> StorageResult<Vec<Vec<f64>>>;
async fn save_item_norms(
&self,
item_norms: &[f64],
md_path: &Path,
) -> StorageResult<()>;
async fn load_item_norms(&self) -> StorageResult<Vec<f64>>;
async fn save_cluster_assignments(
&self,
assignments: &[Option<usize>],
md_path: &Path,
) -> StorageResult<()>;
async fn load_cluster_assignments(
&self,
) -> StorageResult<Vec<Option<usize>>>;
async fn load_index(&self, key: &str) -> StorageResult<Vec<usize>>;
async fn load_vector(&self, key: &str) -> StorageResult<Vec<f64>>;
async fn save_dense_to_file(
data: &DenseMatrix<f64>,
path: &Path,
) -> StorageResult<()>;
// Provided methods
fn exists(path: &str) -> (bool, Option<PathBuf>) { ... }
fn path_to_uri(path: &Path) -> String { ... }
fn validate_initialized(&self, md_path: &Path) -> StorageResult<()> { ... }
fn to_dense_record_batch(
&self,
matrix: &DenseMatrix<f64>,
) -> Result<RecordBatch, StorageError> { ... }
fn from_dense_record_batch(
&self,
batch: &RecordBatch,
) -> Result<DenseMatrix<f64>, StorageError> { ... }
fn to_sparse_record_batch(
&self,
m: &CsMat<f64>,
) -> StorageResult<RecordBatch> { ... }
fn from_sparse_record_batch(
&self,
batch: RecordBatch,
expected_rows: usize,
expected_cols: usize,
) -> StorageResult<CsMat<f64>> { ... }
async fn save_metadata(
&self,
metadata: &GeneMetadata,
) -> StorageResult<PathBuf> { ... }
async fn load_metadata(&self) -> StorageResult<GeneMetadata> { ... }
}Expand description
Async storage backend for Lance-based graph and embedding data.
This trait defines the minimal async API required to persist and reload all artifacts used by Javelin:
- Dense matrices (embeddings, eigenmaps, energy maps)
- Sparse matrices in CSR form (e.g. Laplacians, adjacency)
- Scalar vectors (eigenvalues, norms, generic f64 sequences)
- Index-like vectors (usize mappings and cluster assignments)
- Clustering metadata (centroid maps, subcentroids, lambdas)
- Global metadata describing the dataset layout and dimensions
§Initialization
Storage must be initialized before saving any data:
- Call
save_metadata()once to write an initial*_metadata.json. - Subsequent
save_*calls validate that metadata exists and is consistent. exists()can be used to detect and reuse an existing initialized store.
Filenames are conventionally:
<base dir>/<instance name or name id>_<key>.lance§Async usage
All I/O functions are async and intended to be called from a Tokio runtime.
Implementations (e.g. LanceStorage) must not create their own runtimes or
block on I/O internally.
§High-level flow
-
Dense data:
save_dense("raw_input", &matrix, md_path)load_dense("raw_input")
-
Sparse data:
save_sparse("laplacian", &csr, md_path)load_sparse("laplacian")
-
Scalars and indices:
save_lambdas,load_lambdassave_vector,load_vectorsave_index,load_indexsave_centroid_map,load_centroid_mapsave_item_norms,load_item_normssave_cluster_assignments,load_cluster_assignments
-
Clustering structure:
save_subcentroids,load_subcentroidssave_subcentroid_lambdas,load_subcentroid_lambdas
Implementations are free to choose the on-disk layout as long as they honor these logical keys and round-trip semantics.
Required Methods§
Sourcefn metadata_path(&self) -> PathBuf
fn metadata_path(&self) -> PathBuf
Returns the metadata path.
Sourcefn basepath_to_uri(&self) -> String
fn basepath_to_uri(&self) -> String
return the base path as file:// string
Sourceasync fn load_dense_from_file(
&self,
path: &Path,
) -> StorageResult<DenseMatrix<f64>>
async fn load_dense_from_file( &self, path: &Path, ) -> StorageResult<DenseMatrix<f64>>
Load initial data using columnar format from a file path.
Implementations may use this as a helper for async load_dense.
Sourcefn file_path(&self, key: &str) -> PathBuf
fn file_path(&self, key: &str) -> PathBuf
Compute the full Lance/parquet file path for a logical filetype.
Sourceasync fn save_dense(
&self,
key: &str,
matrix: &DenseMatrix<f64>,
md_path: &Path,
) -> StorageResult<()>
async fn save_dense( &self, key: &str, matrix: &DenseMatrix<f64>, md_path: &Path, ) -> StorageResult<()>
Saves a dense matrix. Requires metadata to exist.
Sourceasync fn load_dense(&self, key: &str) -> StorageResult<DenseMatrix<f64>>
async fn load_dense(&self, key: &str) -> StorageResult<DenseMatrix<f64>>
Loads a dense matrix from storage.
Sourceasync fn save_sparse(
&self,
key: &str,
matrix: &CsMat<f64>,
md_path: &Path,
) -> StorageResult<()>
async fn save_sparse( &self, key: &str, matrix: &CsMat<f64>, md_path: &Path, ) -> StorageResult<()>
Saves a sparse matrix. Requires metadata to exist.
Sourceasync fn load_sparse(&self, key: &str) -> StorageResult<CsMat<f64>>
async fn load_sparse(&self, key: &str) -> StorageResult<CsMat<f64>>
Loads a sparse matrix from storage.
Sourceasync fn save_lambdas(
&self,
lambdas: &[f64],
md_path: &Path,
) -> StorageResult<()>
async fn save_lambdas( &self, lambdas: &[f64], md_path: &Path, ) -> StorageResult<()>
Saves lambda eigenvalues. Requires metadata to exist.
Sourceasync fn load_lambdas(&self) -> StorageResult<Vec<f64>>
async fn load_lambdas(&self) -> StorageResult<Vec<f64>>
Loads lambda eigenvalues from storage.
Sourceasync fn save_index(
&self,
key: &str,
vector: &[usize],
md_path: &Path,
) -> StorageResult<()>
async fn save_index( &self, key: &str, vector: &[usize], md_path: &Path, ) -> StorageResult<()>
Save vectors that are not lambdas but indices.
Sourceasync fn save_vector(
&self,
key: &str,
vector: &[f64],
md_path: &Path,
) -> StorageResult<()>
async fn save_vector( &self, key: &str, vector: &[f64], md_path: &Path, ) -> StorageResult<()>
save a generic f64 sequence
Sourceasync fn save_centroid_map(
&self,
map: &[usize],
md_path: &Path,
) -> StorageResult<()>
async fn save_centroid_map( &self, map: &[usize], md_path: &Path, ) -> StorageResult<()>
Save centroid_map (vector of usize mapping items to centroids)
Sourceasync fn load_centroid_map(&self) -> StorageResult<Vec<usize>>
async fn load_centroid_map(&self) -> StorageResult<Vec<usize>>
Load centroid_map
Sourceasync fn save_subcentroid_lambdas(
&self,
lambdas: &[f64],
md_path: &Path,
) -> StorageResult<()>
async fn save_subcentroid_lambdas( &self, lambdas: &[f64], md_path: &Path, ) -> StorageResult<()>
Save subcentroid_lambdas (tau values for subcentroids)
Sourceasync fn load_subcentroid_lambdas(&self) -> StorageResult<Vec<f64>>
async fn load_subcentroid_lambdas(&self) -> StorageResult<Vec<f64>>
Load subcentroid_lambdas
Sourceasync fn save_subcentroids(
&self,
subcentroids: &DenseMatrix<f64>,
md_path: &Path,
) -> StorageResult<()>
async fn save_subcentroids( &self, subcentroids: &DenseMatrix<f64>, md_path: &Path, ) -> StorageResult<()>
Save subcentroids (dense matrix)
Sourceasync fn load_subcentroids(&self) -> StorageResult<Vec<Vec<f64>>>
async fn load_subcentroids(&self) -> StorageResult<Vec<Vec<f64>>>
Load subcentroids
Sourceasync fn save_item_norms(
&self,
item_norms: &[f64],
md_path: &Path,
) -> StorageResult<()>
async fn save_item_norms( &self, item_norms: &[f64], md_path: &Path, ) -> StorageResult<()>
Save item norms (precomputed L2 norms for fast distance computation)
Sourceasync fn load_item_norms(&self) -> StorageResult<Vec<f64>>
async fn load_item_norms(&self) -> StorageResult<Vec<f64>>
Load item norms
Sourceasync fn save_cluster_assignments(
&self,
assignments: &[Option<usize>],
md_path: &Path,
) -> StorageResult<()>
async fn save_cluster_assignments( &self, assignments: &[Option<usize>], md_path: &Path, ) -> StorageResult<()>
Save cluster assignments (Vec<Option
Sourceasync fn load_cluster_assignments(&self) -> StorageResult<Vec<Option<usize>>>
async fn load_cluster_assignments(&self) -> StorageResult<Vec<Option<usize>>>
Load cluster assignments
Sourceasync fn load_index(&self, key: &str) -> StorageResult<Vec<usize>>
async fn load_index(&self, key: &str) -> StorageResult<Vec<usize>>
Load index or generic usize vector from storage.
async fn load_vector(&self, key: &str) -> StorageResult<Vec<f64>>
async fn save_dense_to_file( data: &DenseMatrix<f64>, path: &Path, ) -> StorageResult<()>
Provided Methods§
Sourcefn exists(path: &str) -> (bool, Option<PathBuf>)
fn exists(path: &str) -> (bool, Option<PathBuf>)
Returns true and the path to the metadata file if metadata file exists and is valid,
false otherwise.
This is used to avoid overwriting existing indexes.
Sourcefn path_to_uri(path: &Path) -> String
fn path_to_uri(path: &Path) -> String
Converts a full file path to a file:// URI for Lance.
Sourcefn validate_initialized(&self, md_path: &Path) -> StorageResult<()>
fn validate_initialized(&self, md_path: &Path) -> StorageResult<()>
Validates that the storage directory is properly initialized with metadata.
§Returns
Returns Ok(()) if metadata file exists, otherwise returns an error.
Sourcefn to_dense_record_batch(
&self,
matrix: &DenseMatrix<f64>,
) -> Result<RecordBatch, StorageError>
fn to_dense_record_batch( &self, matrix: &DenseMatrix<f64>, ) -> Result<RecordBatch, StorageError>
Converts a dense matrix to a RecordBatch in vector format (Lance-optimized). Each row of the matrix becomes a single FixedSizeList entry.
Arguments:
- matrix - Dense matrix to convert (N rows × F cols)
Returns:
RecordBatch with schema: { vector: FixedSizeList
Sourcefn from_dense_record_batch(
&self,
batch: &RecordBatch,
) -> Result<DenseMatrix<f64>, StorageError>
fn from_dense_record_batch( &self, batch: &RecordBatch, ) -> Result<DenseMatrix<f64>, StorageError>
Reconstructs a dense matrix from a RecordBatch in vector format.
Arguments:
- batch - RecordBatch containing FixedSizeList
vectors
Returns: DenseMatrix in column-major format (smartcore convention)
Sourcefn to_sparse_record_batch(&self, m: &CsMat<f64>) -> StorageResult<RecordBatch>
fn to_sparse_record_batch(&self, m: &CsMat<f64>) -> StorageResult<RecordBatch>
Converts a sparse CSR matrix to a RecordBatch in columnar format.
Only non-zero entries are stored.
Sourcefn from_sparse_record_batch(
&self,
batch: RecordBatch,
expected_rows: usize,
expected_cols: usize,
) -> StorageResult<CsMat<f64>>
fn from_sparse_record_batch( &self, batch: RecordBatch, expected_rows: usize, expected_cols: usize, ) -> StorageResult<CsMat<f64>>
Reconstructs a sparse CSR matrix from a RecordBatch in columnar format.
batch- RecordBatch containing (row,col,value) tripletsexpected_rows/expected_cols- dimensions taken from metadata
Sourceasync fn save_metadata(&self, metadata: &GeneMetadata) -> StorageResult<PathBuf>
async fn save_metadata(&self, metadata: &GeneMetadata) -> StorageResult<PathBuf>
Initializes storage by saving metadata. Must be called first.
Sourceasync fn load_metadata(&self) -> StorageResult<GeneMetadata>
async fn load_metadata(&self) -> StorageResult<GeneMetadata>
Loads metadata from storage.
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.