1use crate::embed::{EmbedKind, Embedder};
6use crate::skill::Skill;
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::path::Path;
10
11#[derive(Clone, Debug, Serialize, Deserialize)]
12pub struct Entry {
13 pub id: String,
14 pub name: String,
15 pub description: String,
16 pub path: String,
17 pub keywords: Vec<String>,
18 #[serde(default)]
21 pub trigger_phrases: Vec<String>,
22 #[serde(default)]
28 pub body_head: String,
29 pub hash: String,
30 pub embedding: Vec<f32>,
31}
32
33impl Entry {
34 pub fn doc_text(&self) -> String {
38 if self.body_head.is_empty() {
39 self.description.clone()
40 } else {
41 format!("{}\n{}", self.description, self.body_head)
42 }
43 }
44}
45
46#[derive(Clone, Debug, Default, Serialize, Deserialize)]
47pub struct Index {
48 pub model: String,
49 pub dim: usize,
50 pub skills: Vec<Entry>,
51}
52
53impl Index {
54 pub fn get(&self, id: &str) -> Option<&Entry> {
55 self.skills.iter().find(|e| e.id == id)
56 }
57
58 pub fn by_path(&self, path: &Path) -> Option<&Entry> {
63 let raw = path.to_string_lossy();
64 if let Some(e) = self.skills.iter().find(|e| e.path == raw) {
65 return Some(e);
66 }
67 let want = fs::canonicalize(path).ok()?;
68 self.skills
69 .iter()
70 .find(|e| fs::canonicalize(&e.path).ok().as_deref() == Some(want.as_path()))
71 }
72
73 pub fn load(path: &Path) -> anyhow::Result<Option<Index>> {
74 if !path.exists() {
75 return Ok(None);
76 }
77 let data = fs::read_to_string(path)?;
78 Ok(Some(serde_json::from_str(&data)?))
79 }
80
81 pub fn save(&self, path: &Path) -> anyhow::Result<()> {
87 if let Some(parent) = path.parent() {
88 fs::create_dir_all(parent)?;
89 }
90 let json = serde_json::to_string_pretty(self)?;
91 let tmp = path.with_extension(format!("tmp.{}", std::process::id()));
92 fs::write(&tmp, json)?;
93 if let Err(e) = fs::rename(&tmp, path) {
94 let _ = fs::remove_file(&tmp);
95 return Err(e.into());
96 }
97 Ok(())
98 }
99}
100
101pub fn build(
105 skills: &[Skill],
106 embedder: &dyn Embedder,
107 prev: Option<&Index>,
108) -> anyhow::Result<Index> {
109 let model = embedder.id();
110 let mut entries: Vec<Option<Entry>> = vec![None; skills.len()];
111 let mut to_embed: Vec<usize> = Vec::new();
112
113 for (i, s) in skills.iter().enumerate() {
114 let reuse = prev
115 .filter(|p| p.model == model)
116 .and_then(|p| p.get(&s.id))
117 .filter(|e| e.hash == s.hash)
118 .cloned();
119 match reuse {
120 Some(mut e) => {
126 e.keywords = s.keywords.clone();
127 e.trigger_phrases = s.trigger_phrases.clone();
128 e.body_head = s.body_head.clone();
129 entries[i] = Some(e);
130 }
131 None => to_embed.push(i),
132 }
133 }
134
135 if !to_embed.is_empty() {
136 let texts: Vec<String> = to_embed
137 .iter()
138 .map(|&i| skills[i].description.clone())
139 .collect();
140 let embs = embedder.embed(&texts, EmbedKind::Document)?;
141 for (k, &i) in to_embed.iter().enumerate() {
142 let s = &skills[i];
143 entries[i] = Some(Entry {
144 id: s.id.clone(),
145 name: s.name.clone(),
146 description: s.description.clone(),
147 path: s.path.display().to_string(),
148 keywords: s.keywords.clone(),
149 trigger_phrases: s.trigger_phrases.clone(),
150 body_head: s.body_head.clone(),
151 hash: s.hash.clone(),
152 embedding: embs[k].clone(),
153 });
154 }
155 }
156
157 let skills: Vec<Entry> = entries.into_iter().flatten().collect();
158 let dim = skills.first().map(|e| e.embedding.len()).unwrap_or(0);
159 Ok(Index { model, dim, skills })
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165 use crate::skill::Skill;
166 use std::sync::atomic::{AtomicUsize, Ordering};
167
168 struct CountingEmbedder(AtomicUsize);
171 impl Embedder for CountingEmbedder {
172 fn id(&self) -> String {
173 "counting".into()
174 }
175 fn embed(&self, texts: &[String], _: EmbedKind) -> anyhow::Result<Vec<Vec<f32>>> {
176 self.0.fetch_add(texts.len(), Ordering::SeqCst);
177 Ok(texts.iter().map(|_| vec![1.0, 0.0]).collect())
178 }
179 }
180
181 fn skill(id: &str, hash: &str) -> Skill {
182 Skill {
183 id: id.to_string(),
184 name: id.to_string(),
185 description: format!("does {id}"),
186 body_head: String::new(),
187 keywords: Vec::new(),
188 trigger_phrases: Vec::new(),
189 path: std::path::PathBuf::from(format!("/s/{id}/SKILL.md")),
190 hash: hash.to_string(),
191 }
192 }
193
194 #[test]
195 fn entry_doc_text_appends_body_head_when_present() {
196 let mut e = entry("a", "/s/a/SKILL.md");
197 e.description = "Edit Word documents.".into();
198 assert_eq!(e.doc_text(), "Edit Word documents.");
200 e.body_head = "Insert tables and a table of contents.".into();
203 assert_eq!(
204 e.doc_text(),
205 "Edit Word documents.\nInsert tables and a table of contents."
206 );
207 }
208
209 #[test]
210 fn build_persists_and_refreshes_body_head() {
211 let mut s = skill("a", "h1");
214 s.body_head = "first body line".into();
215 let e = CountingEmbedder(AtomicUsize::new(0));
216 let idx = build(std::slice::from_ref(&s), &e, None).unwrap();
217 assert_eq!(idx.get("a").unwrap().body_head, "first body line");
218
219 let mut stale = idx.clone();
224 stale.skills[0].body_head.clear();
225 let refreshed = build(std::slice::from_ref(&s), &e, Some(&stale)).unwrap();
226 assert_eq!(e.0.load(Ordering::SeqCst), 1, "reuse must not re-embed");
227 assert_eq!(refreshed.get("a").unwrap().body_head, "first body line");
228 }
229
230 #[test]
231 fn body_head_absent_index_still_deserializes() {
232 let json = r#"{"model":"m","dim":2,"skills":[{"id":"a","name":"a",
235 "description":"d","path":"/s/a/SKILL.md","keywords":[],"hash":"h",
236 "embedding":[1.0,0.0]}]}"#;
237 let idx: Index = serde_json::from_str(json).unwrap();
238 assert_eq!(idx.get("a").unwrap().body_head, "");
239 assert_eq!(idx.get("a").unwrap().doc_text(), "d");
240 }
241
242 #[test]
243 fn rebuild_with_prev_reuses_unchanged_embeddings() {
244 let skills = vec![skill("a", "h1"), skill("b", "h2")];
245 let e = CountingEmbedder(AtomicUsize::new(0));
246 let first = build(&skills, &e, None).unwrap();
247 assert_eq!(e.0.load(Ordering::SeqCst), 2); let again = build(&skills, &e, Some(&first)).unwrap();
252 assert_eq!(
253 e.0.load(Ordering::SeqCst),
254 2,
255 "unchanged skills re-embedded"
256 );
257 assert_eq!(again.skills.len(), 2);
258
259 let changed = vec![skill("a", "h1-new"), skill("b", "h2")];
261 let _ = build(&changed, &e, Some(&first)).unwrap();
262 assert_eq!(
263 e.0.load(Ordering::SeqCst),
264 3,
265 "expected exactly one re-embed"
266 );
267 }
268
269 #[test]
270 fn save_is_atomic_and_leaves_no_temp() {
271 let dir = std::env::temp_dir().join(format!("ski-index-save-{}", std::process::id()));
272 let path = dir.join("index.json");
273 let idx = Index {
274 model: "m".into(),
275 dim: 2,
276 skills: vec![entry("a", "/s/a/SKILL.md")],
277 };
278 idx.save(&path).unwrap();
279 let back = Index::load(&path).unwrap().unwrap();
280 assert_eq!(back.skills[0].id, "a");
281 let leftovers: Vec<_> = fs::read_dir(&dir)
282 .unwrap()
283 .filter_map(|e| e.ok())
284 .map(|e| e.file_name())
285 .filter(|n| n != "index.json")
286 .collect();
287 assert!(leftovers.is_empty(), "temp file left behind: {leftovers:?}");
288 let _ = fs::remove_dir_all(&dir);
289 }
290
291 fn entry(id: &str, path: &str) -> Entry {
292 Entry {
293 id: id.to_string(),
294 name: id.to_string(),
295 description: String::new(),
296 path: path.to_string(),
297 keywords: Vec::new(),
298 trigger_phrases: Vec::new(),
299 body_head: String::new(),
300 hash: String::new(),
301 embedding: Vec::new(),
302 }
303 }
304
305 #[test]
306 fn by_path_matches_stored_string() {
307 let idx = Index {
308 model: "m".into(),
309 dim: 0,
310 skills: vec![
311 entry("pdf", "/skills/pdf/SKILL.md"),
312 entry("xlsx", "/skills/xlsx/SKILL.md"),
313 ],
314 };
315 assert_eq!(
316 idx.by_path(Path::new("/skills/xlsx/SKILL.md")).unwrap().id,
317 "xlsx"
318 );
319 assert!(idx.by_path(Path::new("/skills/none/SKILL.md")).is_none());
320 }
321}