1#![allow(dead_code)]
2
3use log::{debug, info};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::fs;
7use std::path::PathBuf;
8
9use crate::StorageError;
10use crate::traits::backend::StorageBackend;
11use crate::traits::metadata::Metadata;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FileInfo {
16 pub filename: String,
18 pub filetype: String,
20 pub storage_format: String,
22 pub rows: usize,
23 pub cols: usize,
24 pub nnz: Option<usize>,
25 pub size_bytes: Option<u64>,
26}
27
28impl FileInfo {
29 pub fn new(
31 filename: String,
32 filetype: &str,
33 data_shape: (usize, usize),
34 nnz: Option<usize>,
35 size_bytes: Option<u64>,
36 ) -> Self {
37 debug!(
38 "FileInfo::new: filename={}, filetype={}, shape={}x{}, nnz={:?}",
39 filename, filetype, data_shape.0, data_shape.1, nnz
40 );
41 Self {
42 filename,
43 filetype: filetype.into(),
44 storage_format: Self::which_format(filetype),
45 rows: data_shape.0,
46 cols: data_shape.1,
47 nnz,
48 size_bytes,
49 }
50 }
51
52 pub fn which_format(filetype: &str) -> String {
54 match filetype {
55 "dense" => String::from("lance fixed-row"),
56 "sparse" => String::from("lance row-major"),
57 "vector" => String::from("lance row-major"),
58 _ => panic!("filetype not recognised {}", filetype),
59 }
60 }
61
62 pub fn which_filetype(filetype: &str) -> String {
64 match filetype {
65 "rawinput" | "sub_centroids" => String::from("dense"),
66 "adjacency" | "laplacian" | "signals" => String::from("sparse"),
67 "lambdas" | "item_norms" | "norms" => String::from("vector"),
68 _ => panic!("key not recognised {}", filetype),
69 }
70 }
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct GeneMetadata {
78 pub name_id: String,
79 pub nrows: usize,
80 pub ncols: usize,
81 pub base: String,
82 pub files: HashMap<String, FileInfo>,
83 pub created_at: String,
84}
85
86impl GeneMetadata {
87 pub async fn read(path: PathBuf) -> Result<Self, StorageError> {
89 info!("Reading metadata from {:?}", path);
90 let s = fs::read_to_string(path).map_err(|e| StorageError::Io(e.to_string()))?;
91 let md: GeneMetadata = serde_json::from_str(&s).map_err(StorageError::Serde)?;
92 info!("Metadata read successfully");
93 Ok(md)
94 }
95}
96
97impl Metadata for GeneMetadata {
98 fn new(name_id: &str) -> Self {
101 info!("GeneMetadata::new: creating metadata for '{}'", name_id);
102 Self {
103 name_id: name_id.to_string(),
104 nrows: 0,
105 ncols: 0,
106 base: String::from(""),
107 files: HashMap::new(),
108 created_at: chrono::Utc::now().to_rfc3339(),
109 }
110 }
111
112 fn new_fileinfo(
113 &self,
114 key: &str,
115 filetype: &str,
116 data_shape: (usize, usize),
117 nnz: Option<usize>,
118 size_bytes: Option<u64>,
119 ) -> FileInfo {
120 FileInfo::new(
121 format!("{}_{}.lance", self.name_id, key),
122 filetype,
123 (data_shape.0, data_shape.1),
124 nnz,
125 size_bytes,
126 )
127 }
128
129 async fn seed_metadata<B: StorageBackend>(
131 name_id: &str,
132 nitems: usize,
133 nfeatures: usize,
134 storage: &B,
135 ) -> Result<GeneMetadata, StorageError> {
136 info!(
137 "GeneMetadata::seed_metadata: seeding metadata for '{}' with nitems={}, nfeatures={}",
138 name_id, nitems, nfeatures
139 );
140
141 let mut md = Self::new(name_id)
142 .with_base(storage.base_path())
143 .with_dimensions(nitems, nfeatures);
144
145 debug!("GeneMetadata::seed_metadata: registering files");
146
147 let (key, filetype, rows, cols, nnz) = ("rawinput", "dense", nitems, nfeatures, None);
148 debug!(
149 "SpaceMetadata::seed_metadata_eigen: adding file {} ({}x{}, nnz={:?})",
150 filetype, rows, cols, nnz
151 );
152 md = md.add_file(
153 key,
154 FileInfo::new(
155 format!("{}_{}.lance", name_id, key),
156 filetype,
157 (rows, cols),
158 nnz,
159 None,
160 ),
161 );
162
163 debug!("GeneMetadata::seed_metadata: saving metadata to storage");
164 storage.save_metadata(&md).await?;
165
166 info!(
167 "GeneMetadata::seed_metadata: metadata seeded successfully for '{}'",
168 name_id
169 );
170 Ok(md)
171 }
172
173 fn with_base(mut self, base_path: PathBuf) -> Self {
174 self.base = base_path.to_string_lossy().to_string();
175 self
176 }
177
178 fn with_dimensions(mut self, rows: usize, cols: usize) -> Self {
179 debug!(
180 "GeneMetadata::with_dimensions: setting dimensions to {}x{}",
181 rows, cols
182 );
183 self.nrows = rows;
184 self.ncols = cols;
185 self
186 }
187
188 fn add_file(mut self, key: &str, info: FileInfo) -> Self {
189 debug!(
190 "GeneMetadata::add_file: adding file '{}' ({})",
191 key, info.filename
192 );
193 self.files.insert(key.to_string(), info);
194 self
195 }
196}