1#![allow(dead_code)]
2
3use log::{debug, info};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::fs;
7use std::path::PathBuf;
8
9use crate::StorageError;
10use crate::traits::backend::StorageBackend;
11use crate::traits::metadata::Metadata;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct FileInfo {
16 pub filename: String,
18 pub filetype: String,
20 pub storage_format: String,
22 pub rows: usize,
23 pub cols: usize,
24 pub nnz: Option<usize>,
25 pub size_bytes: Option<u64>,
26}
27
28impl FileInfo {
29 pub fn new(
31 filename: String,
32 filetype: &str,
33 data_shape: (usize, usize),
34 nnz: Option<usize>,
35 size_bytes: Option<u64>,
36 ) -> Self {
37 debug!(
38 "FileInfo::new: filename={}, filetype={}, shape={}x{}, nnz={:?}",
39 filename, filetype, data_shape.0, data_shape.1, nnz
40 );
41 Self {
42 filename,
43 filetype: filetype.into(),
44 storage_format: Self::which_format(filetype),
45 rows: data_shape.0,
46 cols: data_shape.1,
47 nnz,
48 size_bytes,
49 }
50 }
51
52 pub fn which_format(filetype: &str) -> String {
54 match filetype {
55 "dense" => String::from("lance fixed-row"),
56 "sparse" => String::from("lance row-major"),
57 "vector" => String::from("lance row-major"),
58 _ => panic!("filetype not recognised {}", filetype),
59 }
60 }
61
62 pub fn which_filetype(filetype: &str) -> String {
64 match filetype {
65 "rawinput" | "sub_centroids" | "dense" => String::from("dense"),
66 "adjacency" | "laplacian" | "signals" | "sparse" => String::from("sparse"),
67 "lambdas" | "item_norms" | "norms" | "vector" => String::from("vector"),
68 _ => panic!(
69 "Wrong filetype: use specific types or generic ('dense', 'sparse', 'vector')"
70 ),
71 }
72 }
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct GeneMetadata {
80 pub name_id: String,
81 pub nrows: usize,
82 pub ncols: usize,
83 pub base: String,
84 pub files: HashMap<String, FileInfo>,
85 pub created_at: String,
86}
87
88impl GeneMetadata {
89 pub async fn read(path: PathBuf) -> Result<Self, StorageError> {
91 info!("Reading metadata from {:?}", path);
92 let s = fs::read_to_string(path).map_err(|e| StorageError::Io(e.to_string()))?;
93 let md: GeneMetadata = serde_json::from_str(&s).map_err(StorageError::Serde)?;
94 info!("Metadata read successfully");
95 Ok(md)
96 }
97}
98
99impl Metadata for GeneMetadata {
100 fn new(name_id: &str) -> Self {
103 info!("GeneMetadata::new: creating metadata for '{}'", name_id);
104 Self {
105 name_id: name_id.to_string(),
106 nrows: 0,
107 ncols: 0,
108 base: String::from(""),
109 files: HashMap::new(),
110 created_at: chrono::Utc::now().to_rfc3339(),
111 }
112 }
113
114 fn new_fileinfo(
115 &self,
116 key: &str,
117 filetype: &str,
118 data_shape: (usize, usize),
119 nnz: Option<usize>,
120 size_bytes: Option<u64>,
121 ) -> FileInfo {
122 FileInfo::new(
123 format!("{}_{}.lance", self.name_id, key),
124 filetype,
125 (data_shape.0, data_shape.1),
126 nnz,
127 size_bytes,
128 )
129 }
130
131 async fn seed_metadata<B: StorageBackend>(
133 name_id: &str,
134 nitems: usize,
135 nfeatures: usize,
136 storage: &B,
137 ) -> Result<GeneMetadata, StorageError> {
138 info!(
139 "GeneMetadata::seed_metadata: seeding metadata for '{}' with nitems={}, nfeatures={}",
140 name_id, nitems, nfeatures
141 );
142
143 let md = Self::new(name_id)
144 .with_base(storage.base_path())
145 .with_dimensions(nitems, nfeatures);
146
147 debug!("GeneMetadata::seed_metadata: saving metadata to storage");
148 storage.save_metadata(&md).await?;
149
150 info!(
151 "GeneMetadata::seed_metadata: metadata seeded successfully for '{}'",
152 name_id
153 );
154 Ok(md)
155 }
156
157 fn with_base(mut self, base_path: PathBuf) -> Self {
158 self.base = base_path.to_string_lossy().to_string();
159 self
160 }
161
162 fn with_dimensions(mut self, rows: usize, cols: usize) -> Self {
163 debug!(
164 "GeneMetadata::with_dimensions: setting dimensions to {}x{}",
165 rows, cols
166 );
167 self.nrows = rows;
168 self.ncols = cols;
169 self
170 }
171
172 fn add_file(mut self, key: &str, info: FileInfo) -> Self {
173 debug!(
174 "GeneMetadata::add_file: adding file '{}' ({})",
175 key, info.filename
176 );
177 self.files.insert(key.to_string(), info);
178 self
179 }
180}