1use anyhow::{Context, Result};
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::fs;
11use std::path::{Path, PathBuf};
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct IndexConfig {
16 pub index_path: PathBuf,
18 pub embedding_model: String,
20 pub embedding_dimensions: usize,
22 pub chunk_size: usize,
24 pub index_on_startup: bool,
26 pub watch_for_changes: bool,
28}
29
30impl Default for IndexConfig {
31 fn default() -> Self {
32 let default_path = dirs::home_dir()
33 .map(|p| p.join(".skill-engine").join("index"))
34 .unwrap_or_else(|| PathBuf::from(".skill-engine/index"));
35
36 Self {
37 index_path: default_path,
38 embedding_model: "all-minilm".to_string(),
39 embedding_dimensions: 384,
40 chunk_size: 32,
41 index_on_startup: true,
42 watch_for_changes: false,
43 }
44 }
45}
46
47impl IndexConfig {
48 pub fn with_path(path: impl Into<PathBuf>) -> Self {
50 Self {
51 index_path: path.into(),
52 ..Default::default()
53 }
54 }
55
56 pub fn with_model(mut self, model: impl Into<String>, dimensions: usize) -> Self {
58 self.embedding_model = model.into();
59 self.embedding_dimensions = dimensions;
60 self
61 }
62
63 pub fn with_chunk_size(mut self, size: usize) -> Self {
65 self.chunk_size = size;
66 self
67 }
68
69 pub fn no_startup_index(mut self) -> Self {
71 self.index_on_startup = false;
72 self
73 }
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
78pub struct SkillChecksum {
79 pub skill_md_hash: String,
81 pub wasm_hash: Option<String>,
83 pub manifest_hash: Option<String>,
85 pub indexed_at: DateTime<Utc>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct IndexMetadata {
92 pub version: u32,
94 pub embedding_model: String,
96 pub dimensions: usize,
98 pub created_at: DateTime<Utc>,
100 pub last_modified: DateTime<Utc>,
102 pub document_count: usize,
104 pub skill_checksums: HashMap<String, SkillChecksum>,
106}
107
108impl IndexMetadata {
109 const CURRENT_VERSION: u32 = 1;
110 const METADATA_FILE: &'static str = "index_metadata.json";
111
112 pub fn new(embedding_model: impl Into<String>, dimensions: usize) -> Self {
114 let now = Utc::now();
115 Self {
116 version: Self::CURRENT_VERSION,
117 embedding_model: embedding_model.into(),
118 dimensions,
119 created_at: now,
120 last_modified: now,
121 document_count: 0,
122 skill_checksums: HashMap::new(),
123 }
124 }
125
126 pub fn load(index_path: &Path) -> Result<Option<Self>> {
128 let metadata_path = index_path.join(Self::METADATA_FILE);
129 if !metadata_path.exists() {
130 return Ok(None);
131 }
132
133 let content = fs::read_to_string(&metadata_path)
134 .context("Failed to read index metadata")?;
135 let metadata: Self = serde_json::from_str(&content)
136 .context("Failed to parse index metadata")?;
137
138 Ok(Some(metadata))
139 }
140
141 pub fn save(&self, index_path: &Path) -> Result<()> {
143 fs::create_dir_all(index_path)
144 .context("Failed to create index directory")?;
145
146 let metadata_path = index_path.join(Self::METADATA_FILE);
147 let content = serde_json::to_string_pretty(self)
148 .context("Failed to serialize index metadata")?;
149 fs::write(&metadata_path, content)
150 .context("Failed to write index metadata")?;
151
152 Ok(())
153 }
154
155 pub fn is_compatible(&self, config: &IndexConfig) -> bool {
157 self.version == Self::CURRENT_VERSION &&
158 self.embedding_model == config.embedding_model &&
159 self.dimensions == config.embedding_dimensions
160 }
161
162 pub fn touch(&mut self) {
164 self.last_modified = Utc::now();
165 }
166}
167
168#[derive(Debug, Clone, Default)]
170pub struct IndexStats {
171 pub total_skills: usize,
173 pub total_documents: usize,
175 pub stale_skills: usize,
177 pub index_size_bytes: u64,
179}
180
181#[derive(Debug, Clone, Default)]
183pub struct SyncResult {
184 pub added: Vec<String>,
186 pub updated: Vec<String>,
188 pub removed: Vec<String>,
190 pub skipped: usize,
192 pub full_reindex: bool,
194}
195
196impl SyncResult {
197 pub fn has_changes(&self) -> bool {
199 !self.added.is_empty() || !self.updated.is_empty() || !self.removed.is_empty()
200 }
201
202 pub fn total_processed(&self) -> usize {
204 self.added.len() + self.updated.len() + self.removed.len() + self.skipped
205 }
206}
207
208pub struct IndexManager {
210 config: IndexConfig,
211 metadata: IndexMetadata,
212}
213
214impl IndexManager {
215 pub fn new(config: IndexConfig) -> Result<Self> {
217 let metadata = match IndexMetadata::load(&config.index_path)? {
219 Some(meta) if meta.is_compatible(&config) => meta,
220 _ => IndexMetadata::new(&config.embedding_model, config.embedding_dimensions),
221 };
222
223 Ok(Self { config, metadata })
224 }
225
226 pub fn config(&self) -> &IndexConfig {
228 &self.config
229 }
230
231 pub fn metadata(&self) -> &IndexMetadata {
233 &self.metadata
234 }
235
236 pub fn stats(&self) -> IndexStats {
238 let index_size_bytes = self.calculate_index_size();
239
240 IndexStats {
241 total_skills: self.metadata.skill_checksums.len(),
242 total_documents: self.metadata.document_count,
243 stale_skills: 0, index_size_bytes,
245 }
246 }
247
248 pub fn compute_skill_checksum(&self, skill_path: &Path) -> Result<SkillChecksum> {
250 let mut skill_md_hash = String::new();
251 let mut wasm_hash = None;
252 let mut manifest_hash = None;
253
254 let skill_md_path = skill_path.join("SKILL.md");
256 if skill_md_path.exists() {
257 let content = fs::read(&skill_md_path)
258 .context("Failed to read SKILL.md")?;
259 skill_md_hash = self.hash_content(&content);
260 }
261
262 for entry in fs::read_dir(skill_path).into_iter().flatten() {
264 if let Ok(entry) = entry {
265 if entry.path().extension().map_or(false, |e| e == "wasm") {
266 let content = fs::read(entry.path())
267 .context("Failed to read WASM file")?;
268 wasm_hash = Some(self.hash_content(&content));
269 break;
270 }
271 }
272 }
273
274 for filename in ["skill.toml", "skill.json"] {
276 let manifest_path = skill_path.join(filename);
277 if manifest_path.exists() {
278 let content = fs::read(&manifest_path)
279 .context("Failed to read manifest")?;
280 manifest_hash = Some(self.hash_content(&content));
281 break;
282 }
283 }
284
285 Ok(SkillChecksum {
286 skill_md_hash,
287 wasm_hash,
288 manifest_hash,
289 indexed_at: Utc::now(),
290 })
291 }
292
293 pub fn needs_reindex(&self, skill_name: &str, skill_path: &Path) -> Result<bool> {
295 let existing = match self.metadata.skill_checksums.get(skill_name) {
297 Some(checksum) => checksum,
298 None => return Ok(true), };
300
301 let current = self.compute_skill_checksum(skill_path)?;
303
304 Ok(existing.skill_md_hash != current.skill_md_hash ||
306 existing.wasm_hash != current.wasm_hash ||
307 existing.manifest_hash != current.manifest_hash)
308 }
309
310 pub fn record_indexed(&mut self, skill_name: &str, checksum: SkillChecksum, doc_count: usize) -> Result<()> {
312 self.metadata.skill_checksums.insert(skill_name.to_string(), checksum);
313 self.metadata.document_count = self.metadata.document_count.saturating_add(doc_count);
314 self.metadata.touch();
315 self.save_metadata()
316 }
317
318 pub fn record_removed(&mut self, skill_name: &str, doc_count: usize) -> Result<()> {
320 self.metadata.skill_checksums.remove(skill_name);
321 self.metadata.document_count = self.metadata.document_count.saturating_sub(doc_count);
322 self.metadata.touch();
323 self.save_metadata()
324 }
325
326 pub fn plan_sync(&self, current_skills: &HashMap<String, PathBuf>) -> Result<SyncResult> {
328 let mut result = SyncResult::default();
329
330 for (skill_name, skill_path) in current_skills {
332 if !self.metadata.skill_checksums.contains_key(skill_name) {
333 result.added.push(skill_name.clone());
334 } else if self.needs_reindex(skill_name, skill_path)? {
335 result.updated.push(skill_name.clone());
336 } else {
337 result.skipped += 1;
338 }
339 }
340
341 for skill_name in self.metadata.skill_checksums.keys() {
343 if !current_skills.contains_key(skill_name) {
344 result.removed.push(skill_name.clone());
345 }
346 }
347
348 Ok(result)
349 }
350
351 pub fn needs_full_reindex(&self, config: &IndexConfig) -> bool {
353 match IndexMetadata::load(&config.index_path) {
355 Ok(Some(meta)) => !meta.is_compatible(config),
356 Ok(None) => true, Err(_) => true, }
359 }
360
361 pub fn clear(&mut self) -> Result<()> {
363 self.metadata = IndexMetadata::new(&self.config.embedding_model, self.config.embedding_dimensions);
364 self.save_metadata()?;
365
366 let data_dir = self.config.index_path.join("data");
368 if data_dir.exists() {
369 fs::remove_dir_all(&data_dir)
370 .context("Failed to remove index data")?;
371 }
372
373 Ok(())
374 }
375
376 fn save_metadata(&self) -> Result<()> {
378 self.metadata.save(&self.config.index_path)
379 }
380
381 fn calculate_index_size(&self) -> u64 {
383 if !self.config.index_path.exists() {
384 return 0;
385 }
386
387 walkdir::WalkDir::new(&self.config.index_path)
388 .into_iter()
389 .filter_map(|e| e.ok())
390 .filter_map(|e| e.metadata().ok())
391 .map(|m| m.len())
392 .sum()
393 }
394
395 fn hash_content(&self, content: &[u8]) -> String {
397 use std::io::Write;
398 let mut hasher = blake3::Hasher::new();
399 hasher.write_all(content).expect("write to hasher");
400 hasher.finalize().to_hex().to_string()
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 use tempfile::TempDir;
408
409 fn temp_config() -> (IndexConfig, TempDir) {
410 let temp_dir = TempDir::new().unwrap();
411 let config = IndexConfig::with_path(temp_dir.path().join("index"));
412 (config, temp_dir)
413 }
414
415 #[test]
416 fn test_config_default() {
417 let config = IndexConfig::default();
418 assert!(config.index_path.to_str().unwrap().contains(".skill-engine"));
419 assert_eq!(config.embedding_model, "all-minilm");
420 assert_eq!(config.embedding_dimensions, 384);
421 assert_eq!(config.chunk_size, 32);
422 assert!(config.index_on_startup);
423 }
424
425 #[test]
426 fn test_config_builder() {
427 let config = IndexConfig::with_path("/tmp/test")
428 .with_model("bge-small", 384)
429 .with_chunk_size(64)
430 .no_startup_index();
431
432 assert_eq!(config.index_path, PathBuf::from("/tmp/test"));
433 assert_eq!(config.embedding_model, "bge-small");
434 assert_eq!(config.chunk_size, 64);
435 assert!(!config.index_on_startup);
436 }
437
438 #[test]
439 fn test_metadata_new() {
440 let meta = IndexMetadata::new("test-model", 384);
441 assert_eq!(meta.version, IndexMetadata::CURRENT_VERSION);
442 assert_eq!(meta.embedding_model, "test-model");
443 assert_eq!(meta.dimensions, 384);
444 assert_eq!(meta.document_count, 0);
445 assert!(meta.skill_checksums.is_empty());
446 }
447
448 #[test]
449 fn test_metadata_save_load() {
450 let (config, _temp) = temp_config();
451
452 let mut meta = IndexMetadata::new(&config.embedding_model, config.embedding_dimensions);
453 meta.document_count = 42;
454 meta.skill_checksums.insert(
455 "test-skill".to_string(),
456 SkillChecksum {
457 skill_md_hash: "abc123".to_string(),
458 wasm_hash: Some("def456".to_string()),
459 manifest_hash: None,
460 indexed_at: Utc::now(),
461 },
462 );
463
464 meta.save(&config.index_path).unwrap();
465 let loaded = IndexMetadata::load(&config.index_path).unwrap().unwrap();
466
467 assert_eq!(loaded.document_count, 42);
468 assert!(loaded.skill_checksums.contains_key("test-skill"));
469 }
470
471 #[test]
472 fn test_metadata_compatibility() {
473 let config = IndexConfig::default();
474 let meta = IndexMetadata::new(&config.embedding_model, config.embedding_dimensions);
475 assert!(meta.is_compatible(&config));
476
477 let mut incompatible_config = config.clone();
478 incompatible_config.embedding_model = "different-model".to_string();
479 assert!(!meta.is_compatible(&incompatible_config));
480 }
481
482 #[test]
483 fn test_index_manager_creation() {
484 let (config, _temp) = temp_config();
485 let manager = IndexManager::new(config.clone()).unwrap();
486 assert_eq!(manager.metadata().embedding_model, config.embedding_model);
487 }
488
489 #[test]
490 fn test_skill_checksum() {
491 let (config, temp) = temp_config();
492 let manager = IndexManager::new(config).unwrap();
493
494 let skill_dir = temp.path().join("test-skill");
496 fs::create_dir_all(&skill_dir).unwrap();
497 fs::write(skill_dir.join("SKILL.md"), "# Test Skill\n").unwrap();
498 fs::write(skill_dir.join("skill.toml"), "name = \"test\"").unwrap();
499
500 let checksum = manager.compute_skill_checksum(&skill_dir).unwrap();
501 assert!(!checksum.skill_md_hash.is_empty());
502 assert!(checksum.manifest_hash.is_some());
503 assert!(checksum.wasm_hash.is_none());
504 }
505
506 #[test]
507 fn test_needs_reindex() {
508 let (config, temp) = temp_config();
509 let mut manager = IndexManager::new(config).unwrap();
510
511 let skill_dir = temp.path().join("test-skill");
513 fs::create_dir_all(&skill_dir).unwrap();
514 fs::write(skill_dir.join("SKILL.md"), "# Test Skill v1\n").unwrap();
515
516 assert!(manager.needs_reindex("test-skill", &skill_dir).unwrap());
518
519 let checksum = manager.compute_skill_checksum(&skill_dir).unwrap();
521 manager.record_indexed("test-skill", checksum, 5).unwrap();
522
523 assert!(!manager.needs_reindex("test-skill", &skill_dir).unwrap());
525
526 fs::write(skill_dir.join("SKILL.md"), "# Test Skill v2\n").unwrap();
528
529 assert!(manager.needs_reindex("test-skill", &skill_dir).unwrap());
531 }
532
533 #[test]
534 fn test_plan_sync() {
535 let (config, temp) = temp_config();
536 let mut manager = IndexManager::new(config).unwrap();
537
538 let checksum = SkillChecksum {
540 skill_md_hash: "old_hash".to_string(),
541 wasm_hash: None,
542 manifest_hash: None,
543 indexed_at: Utc::now(),
544 };
545 manager.record_indexed("existing-skill", checksum.clone(), 3).unwrap();
546 manager.record_indexed("removed-skill", checksum, 2).unwrap();
547
548 let existing_skill_dir = temp.path().join("existing-skill");
550 let new_skill_dir = temp.path().join("new-skill");
551 fs::create_dir_all(&existing_skill_dir).unwrap();
552 fs::create_dir_all(&new_skill_dir).unwrap();
553 fs::write(existing_skill_dir.join("SKILL.md"), "# Existing\n").unwrap();
554 fs::write(new_skill_dir.join("SKILL.md"), "# New\n").unwrap();
555
556 let mut current_skills = HashMap::new();
558 current_skills.insert("existing-skill".to_string(), existing_skill_dir);
559 current_skills.insert("new-skill".to_string(), new_skill_dir);
560
561 let result = manager.plan_sync(¤t_skills).unwrap();
562
563 assert!(result.added.contains(&"new-skill".to_string()));
564 assert!(result.updated.contains(&"existing-skill".to_string())); assert!(result.removed.contains(&"removed-skill".to_string()));
566 }
567
568 #[test]
569 fn test_sync_result() {
570 let mut result = SyncResult::default();
571 assert!(!result.has_changes());
572 assert_eq!(result.total_processed(), 0);
573
574 result.added.push("skill-1".to_string());
575 result.skipped = 2;
576 assert!(result.has_changes());
577 assert_eq!(result.total_processed(), 3);
578 }
579
580 #[test]
581 fn test_clear_index() {
582 let (config, _temp) = temp_config();
583 let mut manager = IndexManager::new(config).unwrap();
584
585 let checksum = SkillChecksum {
587 skill_md_hash: "hash".to_string(),
588 wasm_hash: None,
589 manifest_hash: None,
590 indexed_at: Utc::now(),
591 };
592 manager.record_indexed("test-skill", checksum, 10).unwrap();
593 assert!(!manager.metadata().skill_checksums.is_empty());
594
595 manager.clear().unwrap();
597 assert!(manager.metadata().skill_checksums.is_empty());
598 assert_eq!(manager.metadata().document_count, 0);
599 }
600}