1use anyhow::{Context, Result};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct IndexMeta {
9 pub version: String,
10 pub backend_name: String,
11 pub embedding_model: String,
12 pub dimensions: usize,
13 #[serde(default)]
14 pub backend_kwargs: HashMap<String, serde_json::Value>,
15 #[serde(default = "default_embedding_mode")]
16 pub embedding_mode: String,
17 #[serde(default)]
18 pub passage_sources: Vec<PassageSource>,
19 #[serde(default)]
20 pub embedding_options: HashMap<String, serde_json::Value>,
21 #[serde(default)]
23 pub is_compact: Option<bool>,
24 #[serde(default)]
26 pub is_pruned: Option<bool>,
27 #[serde(default)]
29 pub total_passages: Option<usize>,
30 #[serde(default)]
32 pub built_from_precomputed_embeddings: Option<bool>,
33 #[serde(default)]
34 pub embeddings_source: Option<String>,
35}
36
37fn default_embedding_mode() -> String {
38 "sentence-transformers".to_string()
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct PassageSource {
44 #[serde(rename = "type")]
45 pub source_type: String,
46 #[serde(default)]
47 pub path: String,
48 #[serde(default)]
49 pub index_path: String,
50 #[serde(default)]
51 pub path_relative: Option<String>,
52 #[serde(default)]
53 pub index_path_relative: Option<String>,
54}
55
56#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "lowercase")]
59pub enum DistanceMetric {
60 #[default]
61 Mips,
62 L2,
63 Cosine,
64}
65
66impl DistanceMetric {
67 pub fn from_str_lossy(s: &str) -> Self {
68 match s.to_lowercase().as_str() {
69 "l2" => DistanceMetric::L2,
70 "cosine" => DistanceMetric::Cosine,
71 _ => DistanceMetric::Mips,
72 }
73 }
74}
75
76impl IndexMeta {
77 pub fn load(path: &Path) -> Result<Self> {
79 let content =
80 std::fs::read_to_string(path).with_context(|| format!("reading {}", path.display()))?;
81 let meta: IndexMeta = serde_json::from_str(&content)
82 .with_context(|| format!("parsing meta.json at {}", path.display()))?;
83 Ok(meta)
84 }
85
86 pub fn save(&self, path: &Path) -> Result<()> {
88 let content = serde_json::to_string_pretty(self)?;
89 std::fs::write(path, content)?;
90 Ok(())
91 }
92
93 pub fn distance_metric(&self) -> DistanceMetric {
95 self.backend_kwargs
96 .get("distance_metric")
97 .and_then(|v| v.as_str())
98 .map(DistanceMetric::from_str_lossy)
99 .unwrap_or_default()
100 }
101
102 pub fn requires_recompute(&self) -> bool {
104 if let Some(pruned) = self.is_pruned {
105 return pruned;
106 }
107 self.backend_kwargs
108 .get("is_recompute")
109 .and_then(|v| v.as_bool())
110 .unwrap_or(true)
111 }
112}
113
114pub struct IndexPaths {
116 pub base_dir: PathBuf,
117 pub index_name: String,
118}
119
120impl IndexPaths {
121 pub fn new(index_path: &Path) -> Self {
122 let base_dir = index_path.parent().unwrap_or(Path::new(".")).to_path_buf();
123 let index_name = index_path
124 .file_name()
125 .unwrap_or_default()
126 .to_string_lossy()
127 .to_string();
128 Self {
129 base_dir,
130 index_name,
131 }
132 }
133
134 pub fn meta_path(&self) -> PathBuf {
135 self.base_dir.join(format!("{}.meta.json", self.index_name))
136 }
137
138 pub fn passages_path(&self) -> PathBuf {
139 self.base_dir
140 .join(format!("{}.passages.jsonl", self.index_name))
141 }
142
143 pub fn offset_path(&self) -> PathBuf {
144 self.base_dir
145 .join(format!("{}.passages.idx", self.index_name))
146 }
147
148 pub fn index_file_path(&self) -> PathBuf {
149 let stem = self
151 .index_name
152 .strip_suffix(".leann")
153 .unwrap_or(&self.index_name);
154 self.base_dir.join(format!("{}.index", stem))
155 }
156
157 pub fn id_map_path(&self) -> PathBuf {
158 let stem = self
159 .index_name
160 .strip_suffix(".leann")
161 .unwrap_or(&self.index_name);
162 self.base_dir.join(format!("{}.ids.txt", stem))
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169 use std::io::Write;
170 use tempfile::NamedTempFile;
171
172 #[test]
173 fn test_index_meta_roundtrip() {
174 let meta = IndexMeta {
175 version: "1.0".to_string(),
176 backend_name: "hnsw".to_string(),
177 embedding_model: "facebook/contriever".to_string(),
178 dimensions: 768,
179 backend_kwargs: HashMap::new(),
180 embedding_mode: "sentence-transformers".to_string(),
181 passage_sources: vec![],
182 embedding_options: HashMap::new(),
183 is_compact: Some(true),
184 is_pruned: Some(true),
185 total_passages: None,
186 built_from_precomputed_embeddings: None,
187 embeddings_source: None,
188 };
189
190 let json = serde_json::to_string_pretty(&meta).unwrap();
191 let deserialized: IndexMeta = serde_json::from_str(&json).unwrap();
192 assert_eq!(deserialized.backend_name, "hnsw");
193 assert_eq!(deserialized.dimensions, 768);
194 }
195
196 #[test]
197 fn test_index_meta_load() {
198 let mut file = NamedTempFile::new().unwrap();
199 write!(
200 file,
201 r#"{{
202 "version": "1.0",
203 "backend_name": "hnsw",
204 "embedding_model": "test-model",
205 "dimensions": 384,
206 "passage_sources": []
207 }}"#
208 )
209 .unwrap();
210
211 let meta = IndexMeta::load(file.path()).unwrap();
212 assert_eq!(meta.embedding_model, "test-model");
213 assert_eq!(meta.dimensions, 384);
214 assert_eq!(meta.embedding_mode, "sentence-transformers");
215 }
216
217 #[test]
218 fn test_distance_metric_parsing() {
219 assert_eq!(DistanceMetric::from_str_lossy("mips"), DistanceMetric::Mips);
220 assert_eq!(DistanceMetric::from_str_lossy("l2"), DistanceMetric::L2);
221 assert_eq!(
222 DistanceMetric::from_str_lossy("cosine"),
223 DistanceMetric::Cosine
224 );
225 assert_eq!(
226 DistanceMetric::from_str_lossy("unknown"),
227 DistanceMetric::Mips
228 );
229 }
230
231 #[test]
232 fn test_index_paths() {
233 let paths = IndexPaths::new(Path::new("/data/my_index.leann"));
234 assert_eq!(paths.base_dir, Path::new("/data"));
235 assert_eq!(paths.index_name, "my_index.leann");
236 assert_eq!(
237 paths.meta_path(),
238 Path::new("/data/my_index.leann.meta.json")
239 );
240 assert_eq!(
241 paths.passages_path(),
242 Path::new("/data/my_index.leann.passages.jsonl")
243 );
244 assert_eq!(paths.index_file_path(), Path::new("/data/my_index.index"));
245 assert_eq!(paths.id_map_path(), Path::new("/data/my_index.ids.txt"));
246 }
247}