Skip to main content

fastskill_core/core/
vector_index.rs

1//! Vector index service for storing and searching skill embeddings
2
3use crate::core::service::ServiceError;
4use async_trait::async_trait;
5use serde::{Deserialize, Serialize};
6use std::path::PathBuf;
7
8/// A skill stored in the vector index
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct IndexedSkill {
11    /// Unique skill identifier
12    pub id: String,
13    /// Path to the skill directory
14    pub skill_path: PathBuf,
15    /// Frontmatter data as JSON
16    pub frontmatter_json: serde_json::Value,
17    /// Vector embedding
18    pub embedding: Vec<f32>,
19    /// SHA256 hash of the SKILL.md file
20    pub file_hash: String,
21    /// Last updated timestamp
22    pub updated_at: chrono::DateTime<chrono::Utc>,
23}
24
25/// Search result with similarity score
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct SkillMatch {
28    /// The indexed skill
29    pub skill: IndexedSkill,
30    /// Similarity score (0.0 to 1.0, higher is more similar)
31    pub similarity: f32,
32}
33
34/// Vector index service trait
35#[async_trait]
36pub trait VectorIndexService: Send + Sync {
37    /// Add or update a skill in the index
38    async fn add_or_update_skill(
39        &self,
40        skill_id: &str,
41        skill_path: PathBuf,
42        frontmatter_json: serde_json::Value,
43        embedding: Vec<f32>,
44        file_hash: &str,
45    ) -> Result<(), ServiceError>;
46
47    /// Search for similar skills using vector similarity
48    async fn search_similar(
49        &self,
50        query_embedding: &[f32],
51        limit: usize,
52    ) -> Result<Vec<SkillMatch>, ServiceError>;
53
54    /// Get a skill by ID
55    async fn get_skill_by_id(&self, skill_id: &str) -> Result<Option<IndexedSkill>, ServiceError>;
56
57    /// Remove a skill from the index
58    async fn remove_skill(&self, skill_id: &str) -> Result<(), ServiceError>;
59
60    /// Get all skills in the index
61    async fn get_all_skills(&self) -> Result<Vec<IndexedSkill>, ServiceError>;
62}
63
64/// SQLite-based vector index service implementation
65pub struct VectorIndexServiceImpl {
66    /// Path to the SQLite database file
67    db_path: PathBuf,
68}
69
70impl VectorIndexServiceImpl {
71    /// Create a new vector index service
72    pub fn new(db_path: PathBuf) -> Self {
73        Self { db_path }
74    }
75
76    /// Create a new service with default index path
77    pub fn with_default_path(skill_dir: &std::path::Path) -> Self {
78        let index_path = skill_dir.join(".fastskill").join("index.db");
79        Self::new(index_path)
80    }
81
82    /// Create a new service with configured index path
83    pub fn with_config(
84        config: &crate::core::service::EmbeddingConfig,
85        skill_dir: &std::path::Path,
86    ) -> Self {
87        let index_path = config
88            .index_path
89            .clone()
90            .unwrap_or_else(|| skill_dir.join(".fastskill").join("index.db"));
91        Self::new(index_path)
92    }
93
94    /// Ensure the database schema is created
95    async fn ensure_schema(&self) -> Result<(), ServiceError> {
96        let db_path = self.db_path.clone();
97
98        // Run database operations in a blocking task
99        tokio::task::spawn_blocking(move || {
100            // Create parent directory if it doesn't exist
101            if let Some(parent) = db_path.parent() {
102                std::fs::create_dir_all(parent).map_err(|e| {
103                    ServiceError::Custom(format!("Failed to create database directory: {}", e))
104                })?;
105            }
106
107            let conn = rusqlite::Connection::open(&db_path)
108                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
109
110            conn.execute(
111                "CREATE TABLE IF NOT EXISTS skills (
112                    id TEXT PRIMARY KEY,
113                    skill_path TEXT NOT NULL,
114                    frontmatter_json TEXT NOT NULL,
115                    embedding_json TEXT NOT NULL,
116                    file_hash TEXT NOT NULL,
117                    updated_at TEXT NOT NULL
118                )",
119                [],
120            )
121            .map_err(|e| ServiceError::Custom(format!("Failed to create schema: {}", e)))?;
122
123            // Create index for faster lookups
124            conn.execute(
125                "CREATE INDEX IF NOT EXISTS idx_updated_at ON skills(updated_at)",
126                [],
127            )
128            .map_err(|e| ServiceError::Custom(format!("Failed to create index: {}", e)))?;
129
130            Ok(())
131        })
132        .await
133        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))?
134    }
135
136    /// Calculate cosine similarity between two vectors
137    fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
138        if a.len() != b.len() {
139            return 0.0;
140        }
141
142        let mut dot_product = 0.0;
143        let mut norm_a = 0.0;
144        let mut norm_b = 0.0;
145
146        for i in 0..a.len() {
147            dot_product += a[i] * b[i];
148            norm_a += a[i] * a[i];
149            norm_b += b[i] * b[i];
150        }
151
152        let norm_a = norm_a.sqrt();
153        let norm_b = norm_b.sqrt();
154
155        if norm_a == 0.0 || norm_b == 0.0 {
156            0.0
157        } else {
158            dot_product / (norm_a * norm_b)
159        }
160    }
161}
162
163#[async_trait]
164impl VectorIndexService for VectorIndexServiceImpl {
165    async fn add_or_update_skill(
166        &self,
167        skill_id: &str,
168        skill_path: PathBuf,
169        frontmatter_json: serde_json::Value,
170        embedding: Vec<f32>,
171        file_hash: &str,
172    ) -> Result<(), ServiceError> {
173        self.ensure_schema().await?;
174
175        let db_path = self.db_path.clone();
176        let skill_id = skill_id.to_string();
177        let skill_path_str = skill_path.to_string_lossy().to_string();
178        let frontmatter_str = serde_json::to_string(&frontmatter_json)
179            .map_err(|e| ServiceError::Custom(format!("Failed to serialize frontmatter: {}", e)))?;
180        let embedding_str = serde_json::to_string(&embedding)
181            .map_err(|e| ServiceError::Custom(format!("Failed to serialize embedding: {}", e)))?;
182        let file_hash = file_hash.to_string();
183        let updated_at = chrono::Utc::now().to_rfc3339();
184
185        tokio::task::spawn_blocking(move || {
186            let conn = rusqlite::Connection::open(&db_path)
187                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
188
189            conn.execute(
190                "INSERT OR REPLACE INTO skills (id, skill_path, frontmatter_json, embedding_json, file_hash, updated_at)
191                 VALUES (?, ?, ?, ?, ?, ?)",
192                rusqlite::params![
193                    skill_id,
194                    skill_path_str,
195                    frontmatter_str,
196                    embedding_str,
197                    file_hash,
198                    updated_at
199                ],
200            )
201            .map_err(|e| ServiceError::Custom(format!("Failed to insert skill: {}", e)))?;
202
203            Ok(())
204        })
205        .await
206        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))?
207    }
208
209    async fn search_similar(
210        &self,
211        query_embedding: &[f32],
212        limit: usize,
213    ) -> Result<Vec<SkillMatch>, ServiceError> {
214        self.ensure_schema().await?;
215
216        let db_path = self.db_path.clone();
217        let query_embedding = query_embedding.to_vec();
218
219        let skills = tokio::task::spawn_blocking(move || {
220            let conn = rusqlite::Connection::open(&db_path)
221                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
222
223            let mut stmt = conn
224                .prepare("SELECT id, skill_path, frontmatter_json, embedding_json, file_hash, updated_at FROM skills")
225                .map_err(|e| ServiceError::Custom(format!("Failed to prepare query: {}", e)))?;
226
227            let skill_iter = stmt.query_map([], |row| {
228                let id: String = row.get(0)?;
229                let skill_path: String = row.get(1)?;
230                let frontmatter_str: String = row.get(2)?;
231                let embedding_str: String = row.get(3)?;
232                let file_hash: String = row.get(4)?;
233                let updated_at_str: String = row.get(5)?;
234
235                let frontmatter_json: serde_json::Value = serde_json::from_str(&frontmatter_str)
236                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
237                let embedding: Vec<f32> = serde_json::from_str(&embedding_str)
238                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
239                let updated_at = chrono::DateTime::parse_from_rfc3339(&updated_at_str)
240                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?
241                    .with_timezone(&chrono::Utc);
242
243                Ok(IndexedSkill {
244                    id,
245                    skill_path: PathBuf::from(skill_path),
246                    frontmatter_json,
247                    embedding,
248                    file_hash,
249                    updated_at,
250                })
251            })
252            .map_err(|e| ServiceError::Custom(format!("Failed to query skills: {}", e)))?;
253
254            let mut skills = Vec::new();
255            for skill in skill_iter {
256                skills.push(skill.map_err(|e| ServiceError::Custom(format!("Failed to parse skill: {}", e)))?);
257            }
258
259            Ok::<Vec<IndexedSkill>, ServiceError>(skills)
260        })
261        .await
262        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))??;
263
264        // Calculate similarities and sort
265        let mut matches: Vec<SkillMatch> = skills
266            .into_iter()
267            .map(|skill| {
268                let similarity = Self::cosine_similarity(&query_embedding, &skill.embedding);
269                SkillMatch { skill, similarity }
270            })
271            .collect();
272
273        // Sort by similarity (highest first)
274        // Handle potential NaN values gracefully
275        matches.sort_by(|a, b| {
276            b.similarity
277                .partial_cmp(&a.similarity)
278                .unwrap_or(std::cmp::Ordering::Equal)
279        });
280
281        // Take top results
282        matches.truncate(limit);
283
284        Ok(matches)
285    }
286
287    async fn get_skill_by_id(&self, skill_id: &str) -> Result<Option<IndexedSkill>, ServiceError> {
288        self.ensure_schema().await?;
289
290        let db_path = self.db_path.clone();
291        let skill_id = skill_id.to_string();
292
293        let skill = tokio::task::spawn_blocking(move || {
294            let conn = rusqlite::Connection::open(&db_path)
295                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
296
297            let mut stmt = conn
298                .prepare("SELECT id, skill_path, frontmatter_json, embedding_json, file_hash, updated_at FROM skills WHERE id = ?")
299                .map_err(|e| ServiceError::Custom(format!("Failed to prepare query: {}", e)))?;
300
301            let mut rows = stmt.query_map([skill_id], |row| {
302                let id: String = row.get(0)?;
303                let skill_path: String = row.get(1)?;
304                let frontmatter_str: String = row.get(2)?;
305                let embedding_str: String = row.get(3)?;
306                let file_hash: String = row.get(4)?;
307                let updated_at_str: String = row.get(5)?;
308
309                let frontmatter_json: serde_json::Value = serde_json::from_str(&frontmatter_str)
310                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
311                let embedding: Vec<f32> = serde_json::from_str(&embedding_str)
312                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
313                let updated_at = chrono::DateTime::parse_from_rfc3339(&updated_at_str)
314                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?
315                    .with_timezone(&chrono::Utc);
316
317                Ok(IndexedSkill {
318                    id,
319                    skill_path: PathBuf::from(skill_path),
320                    frontmatter_json,
321                    embedding,
322                    file_hash,
323                    updated_at,
324                })
325            })
326            .map_err(|e| ServiceError::Custom(format!("Failed to query skill: {}", e)))?;
327
328            match rows.next() {
329                Some(result) => Ok(Some(result.map_err(|e| ServiceError::Custom(format!("Failed to parse skill: {}", e)))?)),
330                None => Ok(None),
331            }
332        })
333        .await
334        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))?;
335
336        skill
337    }
338
339    async fn remove_skill(&self, skill_id: &str) -> Result<(), ServiceError> {
340        self.ensure_schema().await?;
341
342        let db_path = self.db_path.clone();
343        let skill_id = skill_id.to_string();
344
345        tokio::task::spawn_blocking(move || {
346            let conn = rusqlite::Connection::open(&db_path)
347                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
348
349            conn.execute("DELETE FROM skills WHERE id = ?", [skill_id])
350                .map_err(|e| ServiceError::Custom(format!("Failed to delete skill: {}", e)))?;
351
352            Ok(())
353        })
354        .await
355        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))?
356    }
357
358    async fn get_all_skills(&self) -> Result<Vec<IndexedSkill>, ServiceError> {
359        self.ensure_schema().await?;
360
361        let db_path = self.db_path.clone();
362
363        let skills = tokio::task::spawn_blocking(move || {
364            let conn = rusqlite::Connection::open(&db_path)
365                .map_err(|e| ServiceError::Custom(format!("Failed to open database: {}", e)))?;
366
367            let mut stmt = conn
368                .prepare("SELECT id, skill_path, frontmatter_json, embedding_json, file_hash, updated_at FROM skills")
369                .map_err(|e| ServiceError::Custom(format!("Failed to prepare query: {}", e)))?;
370
371            let skill_iter = stmt.query_map([], |row| {
372                let id: String = row.get(0)?;
373                let skill_path: String = row.get(1)?;
374                let frontmatter_str: String = row.get(2)?;
375                let embedding_str: String = row.get(3)?;
376                let file_hash: String = row.get(4)?;
377                let updated_at_str: String = row.get(5)?;
378
379                let frontmatter_json: serde_json::Value = serde_json::from_str(&frontmatter_str)
380                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
381                let embedding: Vec<f32> = serde_json::from_str(&embedding_str)
382                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?;
383                let updated_at = chrono::DateTime::parse_from_rfc3339(&updated_at_str)
384                    .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?
385                    .with_timezone(&chrono::Utc);
386
387                Ok(IndexedSkill {
388                    id,
389                    skill_path: PathBuf::from(skill_path),
390                    frontmatter_json,
391                    embedding,
392                    file_hash,
393                    updated_at,
394                })
395            })
396            .map_err(|e| ServiceError::Custom(format!("Failed to query skills: {}", e)))?;
397
398            let mut skills = Vec::new();
399            for skill in skill_iter {
400                skills.push(skill.map_err(|e| ServiceError::Custom(format!("Failed to parse skill: {}", e)))?);
401            }
402
403            Ok::<Vec<IndexedSkill>, ServiceError>(skills)
404        })
405        .await
406        .map_err(|e| ServiceError::Custom(format!("Database task failed: {}", e)))??;
407
408        Ok(skills)
409    }
410}