use crate::datasets::make_gaussian_cliques_multi;
use genegraph_storage::lance_storage_graph::LanceStorageGraph;
use genegraph_storage::metadata::GeneMetadata;
use genegraph_storage::traits::backend::StorageBackend;
use genegraph_storage::traits::metadata::Metadata;
use log::{debug, info};
use smartcore::linalg::basic::arrays::Array2;
use smartcore::linalg::basic::matrix::DenseMatrix;
use std::path::PathBuf;
pub async fn cmd_generate(n_items: usize, n_dims: usize, seed: u64) -> anyhow::Result<()> {
let name_id = "javelin_test";
#[cfg(test)]
fn prepare_dir(n_id: &str) -> PathBuf {
let mut out_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
out_dir.push("target");
out_dir.push("debug");
out_dir.push(n_id);
out_dir
}
#[cfg(not(test))]
fn prepare_dir(n_id: &str) -> PathBuf {
let mut out_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
out_dir.push(n_id);
out_dir
}
let out_dir = prepare_dir(name_id);
if out_dir.exists() {
std::fs::remove_dir_all(&out_dir).unwrap();
}
let storage = LanceStorageGraph::new(
out_dir.to_str().expect("non-UTF8 test path").to_string(),
"javelin_test".to_string(),
);
let (dense, sparse, vector) = make_gaussian_cliques_multi(n_items, 0.3, 5, n_dims, seed);
let (nitems, nfeatures) = (dense.len(), dense[0].len());
GeneMetadata::seed_metadata(&name_id, nitems, nfeatures, &storage)
.await
.unwrap();
debug!("Saving metadata first to initialize storage directory");
let dense_matrix =
DenseMatrix::<f64>::from_iterator(dense.iter().flatten().map(|x| *x), nitems, nfeatures, 0);
storage
.save_dense("raw_input", &dense_matrix, &storage.metadata_path())
.await?;
let mut md: GeneMetadata = storage.load_metadata().await.unwrap();
let mock_info_adj = md.new_fileinfo(
"adjacency",
"sparse",
(nitems, nitems),
Some(sparse.nnz()),
None,
);
let mock_info_norms = md.new_fileinfo("norms", "vector", (nitems, 1), None, None);
md = md.add_file("adjacency", mock_info_adj);
md = md.add_file("norms", mock_info_norms);
storage
.save_sparse("adjacency", &sparse, &storage.metadata_path())
.await
.unwrap();
storage
.save_vector("norms", &vector.as_slice(), &storage.metadata_path())
.await
.unwrap();
storage.save_metadata(&md).await?;
println!(
"Generated example datasets in {:?}:
- dense Lance: {} rows × {} cols (raw_input)
- sparse Lance: (adjacency)
- 1D vector Lance: (norms)",
out_dir, nitems, nfeatures,
);
info!("Try now `javelin --filepath ./javelin_test`");
Ok(())
}