1#![allow(dead_code)]
2
3use log::{debug, info};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::fs;
7use std::path::PathBuf;
8
9use crate::StorageError;
10use crate::traits::StorageBackend;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct FileInfo {
15 pub filename: String,
17 pub filetype: String,
19 pub storage_format: String,
21 pub rows: usize,
22 pub cols: usize,
23 pub nnz: Option<usize>,
24 pub size_bytes: Option<u64>,
25}
26
27impl FileInfo {
28 pub(crate) fn new(
30 filename: String,
31 filetype: &str,
32 data_shape: (usize, usize),
33 nnz: Option<usize>,
34 size_bytes: Option<u64>,
35 ) -> Self {
36 debug!(
37 "FileInfo::new: filename={}, filetype={}, shape={}x{}, nnz={:?}",
38 filename, filetype, data_shape.0, data_shape.1, nnz
39 );
40 Self {
41 filename,
42 filetype: filetype.into(),
43 storage_format: Self::which_format(filetype),
44 rows: data_shape.0,
45 cols: data_shape.1,
46 nnz,
47 size_bytes,
48 }
49 }
50
51 pub fn which_format(filetype: &str) -> String {
53 match filetype {
54 "dense" => String::from("lance fixed-row"),
55 "sparse" => String::from("lance row-major"),
56 "vector" => String::from("lance row-major"),
57 _ => panic!("filetype not recognised {}", filetype),
58 }
59 }
60
61 pub fn which_filetype(filetype: &str) -> String {
63 match filetype {
64 "rawinput" | "sub_centroids" => String::from("dense"),
65 "adjacency" | "laplacian" | "signals" => String::from("sparse"),
66 "lambdas" | "item_norms" | "norms" => String::from("vector"),
67 _ => panic!("key not recognised {}", filetype),
68 }
69 }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct GeneMetadata {
77 pub name_id: String,
78 pub nrows: usize,
79 pub ncols: usize,
80 pub base: String,
81 pub files: HashMap<String, FileInfo>,
82 pub created_at: String,
83}
84
85impl GeneMetadata {
86 fn new(name_id: &str) -> Self {
89 info!("GeneMetadata::new: creating metadata for '{}'", name_id);
90 Self {
91 name_id: name_id.to_string(),
92 nrows: 0,
93 ncols: 0,
94 base: String::from(""),
95 files: HashMap::new(),
96 created_at: chrono::Utc::now().to_rfc3339(),
97 }
98 }
99
100 pub fn new_fileinfo(
101 &self,
102 key: &str,
103 filetype: &str,
104 data_shape: (usize, usize),
105 nnz: Option<usize>,
106 size_bytes: Option<u64>,
107 ) -> FileInfo {
108 FileInfo::new(
109 format!("{}_{}.lance", self.name_id, key),
110 filetype,
111 (data_shape.0, data_shape.1),
112 nnz,
113 size_bytes,
114 )
115 }
116
117 pub async fn read(path: PathBuf) -> Result<Self, StorageError> {
119 info!("Reading metadata from {:?}", path);
120 let s = fs::read_to_string(path).map_err(|e| StorageError::Io(e.to_string()))?;
121 let md: GeneMetadata = serde_json::from_str(&s).map_err(StorageError::Serde)?;
122 info!("Metadata read successfully");
123 Ok(md)
124 }
125
126 pub async fn seed_metadata<B: StorageBackend>(
128 name_id: &str,
129 nitems: usize,
130 nfeatures: usize,
131 storage: &B,
132 ) -> Result<GeneMetadata, StorageError> {
133 info!(
134 "GeneMetadata::seed_metadata: seeding metadata for '{}' with nitems={}, nfeatures={}",
135 name_id, nitems, nfeatures
136 );
137
138 let mut md = Self::new(name_id)
139 .with_base(storage.base_path())
140 .with_dimensions(nitems, nfeatures);
141
142 debug!("GeneMetadata::seed_metadata: registering files");
143
144 let (key, filetype, rows, cols, nnz) = ("rawinput", "dense", nitems, nfeatures, None);
145 debug!(
146 "SpaceMetadata::seed_metadata_eigen: adding file {} ({}x{}, nnz={:?})",
147 filetype, rows, cols, nnz
148 );
149 md = md.add_file(
150 key,
151 FileInfo::new(
152 format!("{}_{}.lance", name_id, key),
153 filetype,
154 (rows, cols),
155 nnz,
156 None,
157 ),
158 );
159
160 debug!("GeneMetadata::seed_metadata: saving metadata to storage");
161 storage.save_metadata(&md).await?;
162
163 info!(
164 "GeneMetadata::seed_metadata: metadata seeded successfully for '{}'",
165 name_id
166 );
167 Ok(md)
168 }
169
170 pub fn with_base(mut self, base_path: PathBuf) -> Self {
171 self.base = base_path.to_string_lossy().to_string();
172 self
173 }
174
175 pub fn with_dimensions(mut self, rows: usize, cols: usize) -> Self {
176 debug!(
177 "GeneMetadata::with_dimensions: setting dimensions to {}x{}",
178 rows, cols
179 );
180 self.nrows = rows;
181 self.ncols = cols;
182 self
183 }
184
185 pub fn add_file(mut self, key: &str, info: FileInfo) -> Self {
186 debug!(
187 "GeneMetadata::add_file: adding file '{}' ({})",
188 key, info.filename
189 );
190 self.files.insert(key.to_string(), info);
191 self
192 }
193}