1use crate::catalog::{Catalog, db_err};
11use orbok_core::{ChunkId, EmbeddingId, FileId, ModelId, OrbokResult, now_iso8601};
12use rusqlite::params;
13
14pub struct NewEmbedding {
16 pub chunk_id: ChunkId,
17 pub model_id: ModelId,
18 pub dimension: u32,
19 pub vector: Vec<f32>,
21}
22
23#[derive(Debug, Clone)]
25pub struct EmbeddingRecord {
26 pub embedding_id: EmbeddingId,
27 pub chunk_id: ChunkId,
28 pub file_id: FileId,
29 pub vector: Vec<f32>,
30}
31
32pub struct EmbeddingRepository<'a> {
33 catalog: &'a Catalog,
34}
35
36impl<'a> EmbeddingRepository<'a> {
37 pub fn new(catalog: &'a Catalog) -> Self {
38 Self { catalog }
39 }
40
41 pub fn upsert(&self, new: &NewEmbedding) -> OrbokResult<()> {
43 let id = EmbeddingId::generate();
44 let now = now_iso8601();
45 let blob = orbok_models::vec_to_blob(&new.vector);
46 let conn = self.catalog.lock();
47 conn.execute(
48 "INSERT INTO embeddings \
49 (embedding_id, chunk_id, model_id, vector_format, dimension, norm, \
50 storage_location, vector_blob, status, created_at, updated_at) \
51 VALUES (?1,?2,?3,'fp32',?4,'l2','sqlite_blob',?5,'active',?6,?6) \
52 ON CONFLICT(chunk_id, model_id, vector_format) DO UPDATE SET \
53 vector_blob=?5, status='active', updated_at=?6",
54 params![
55 id.as_str(),
56 new.chunk_id.as_str(),
57 new.model_id.as_str(),
58 new.dimension as i64,
59 blob,
60 now,
61 ],
62 )
63 .map_err(db_err)?;
64 Ok(())
65 }
66
67 pub fn list_active_for_scan(
71 &self,
72 model_id: &str,
73 dimension: u32,
74 ) -> OrbokResult<Vec<EmbeddingRecord>> {
75 let conn = self.catalog.lock();
76 let mut stmt = conn
77 .prepare(
78 "SELECT e.embedding_id, e.chunk_id, c.file_id, e.vector_blob \
79 FROM embeddings e \
80 JOIN chunks c ON c.chunk_id = e.chunk_id \
81 WHERE e.model_id = ?1 AND e.dimension = ?2 \
82 AND e.status = 'active' AND c.chunk_status = 'active'",
83 )
84 .map_err(db_err)?;
85 let rows = stmt
86 .query_map(params![model_id, dimension as i64], |row| {
87 Ok((
88 row.get::<_, String>(0)?,
89 row.get::<_, String>(1)?,
90 row.get::<_, String>(2)?,
91 row.get::<_, Vec<u8>>(3)?,
92 ))
93 })
94 .map_err(db_err)?;
95 let mut out = Vec::new();
96 for row in rows {
97 let (emb_id, chunk_id, file_id, blob) = row.map_err(db_err)?;
98 let vector = orbok_models::blob_to_vec(&blob, dimension).unwrap_or_default();
99 out.push(EmbeddingRecord {
100 embedding_id: EmbeddingId::from_string(emb_id),
101 chunk_id: ChunkId::from_string(chunk_id),
102 file_id: FileId::from_string(file_id),
103 vector,
104 });
105 }
106 Ok(out)
107 }
108
109 pub fn mark_stale_for_model(&self, model_id: &str) -> OrbokResult<u64> {
111 let conn = self.catalog.lock();
112 let n = conn
113 .execute(
114 "UPDATE embeddings SET status='stale', updated_at=?2 WHERE model_id=?1",
115 params![model_id, now_iso8601()],
116 )
117 .map_err(db_err)?;
118 Ok(n as u64)
119 }
120
121 pub fn count_active(&self, model_id: &str) -> OrbokResult<u64> {
123 let conn = self.catalog.lock();
124 let n: i64 = conn
125 .query_row(
126 "SELECT COUNT(*) FROM embeddings e \
127 JOIN chunks c ON c.chunk_id = e.chunk_id \
128 WHERE e.model_id=?1 AND e.status='active' AND c.chunk_status='active'",
129 params![model_id],
130 |r| r.get(0),
131 )
132 .map_err(db_err)?;
133 Ok(n as u64)
134 }
135 pub fn upsert_i8(
137 &self,
138 chunk_id: &orbok_core::ChunkId,
139 model_id: &orbok_core::ModelId,
140 dimension: u32,
141 i8_vector: &[i8],
142 ) -> OrbokResult<()> {
143 let id = orbok_core::EmbeddingId::generate();
144 let now = orbok_core::now_iso8601();
145 let blob: Vec<u8> = i8_vector.iter().map(|&b| b as u8).collect();
146 let conn = self.catalog.lock();
147 conn.execute(
148 "INSERT INTO embeddings \
149 (embedding_id, chunk_id, model_id, vector_format, dimension, norm, \
150 storage_location, vector_blob, status, created_at, updated_at) \
151 VALUES (?1,?2,?3,'int8',?4,'l2','sqlite_blob',?5,'active',?6,?6) \
152 ON CONFLICT(chunk_id, model_id, vector_format) DO UPDATE SET \
153 vector_blob=?5, status='active', updated_at=?6",
154 rusqlite::params![
155 id.as_str(),
156 chunk_id.as_str(),
157 model_id.as_str(),
158 dimension as i64,
159 blob,
160 now
161 ],
162 )
163 .map_err(crate::catalog::db_err)?;
164 Ok(())
165 }
166}