vyctor 0.1.0

A fast CLI tool for semantic file search using vector embeddings
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
//! Integration tests for DuckDB storage layer

mod common;

use tempfile::tempdir;
#[allow(unused_imports)]
use vyctor::storage::SearchResult;
use vyctor::storage::Storage;

/// Helper to create a test storage instance
fn create_test_storage(dimensions: usize) -> Option<Storage> {
    if !common::vss_available() {
        return None;
    }

    let dir = tempdir().expect("Failed to create temp directory");
    let _db_path = dir.path().join("test.duckdb");

    // Keep the tempdir alive by leaking it (for test purposes)
    let dir = Box::leak(Box::new(dir));

    Storage::new(&dir.path().join("test.duckdb"), dimensions).ok()
}

#[test]
fn test_storage_creation() {
    if !common::vss_available() {
        eprintln!("Skipping test: VSS extension not available");
        return;
    }

    let dir = tempdir().unwrap();
    let db_path = dir.path().join("test.duckdb");

    let storage = Storage::new(&db_path, 64);
    match storage {
        Ok(_) => {
            // Storage created successfully
        }
        Err(e) => {
            eprintln!("Skipping test: Storage creation failed: {}", e);
            return;
        }
    }
}

#[test]
fn test_in_memory_storage() {
    if !common::vss_available() {
        eprintln!("Skipping test: VSS extension not available");
        return;
    }

    let storage = Storage::in_memory(64);
    match storage {
        Ok(_) => {
            // Storage created successfully
        }
        Err(e) => {
            eprintln!("Skipping test: In-memory storage creation failed: {}", e);
            return;
        }
    }
}

#[test]
fn test_file_upsert_and_get() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert a new file
    let file_id = storage.upsert_file("src/main.rs", "hash123").unwrap();
    assert!(file_id > 0);

    // Get the file
    let file = storage.get_file("src/main.rs").unwrap();
    assert!(file.is_some());
    let file = file.unwrap();
    assert_eq!(file.path, "src/main.rs");
    assert_eq!(file.content_hash, "hash123");
}

#[test]
fn test_file_update() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert a file
    let id1 = storage.upsert_file("file.rs", "hash1").unwrap();

    // Update the file (same path, different hash)
    let id2 = storage.upsert_file("file.rs", "hash2").unwrap();

    // Should return the same ID
    assert_eq!(id1, id2);

    // Hash should be updated
    let file = storage.get_file("file.rs").unwrap().unwrap();
    assert_eq!(file.content_hash, "hash2");
}

#[test]
fn test_file_delete() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert a file
    storage.upsert_file("to_delete.rs", "hash").unwrap();

    // Verify it exists
    assert!(storage.get_file("to_delete.rs").unwrap().is_some());

    // Delete it
    let deleted = storage.delete_file("to_delete.rs").unwrap();
    assert!(deleted);

    // Verify it's gone
    assert!(storage.get_file("to_delete.rs").unwrap().is_none());

    // Deleting again should return false
    let deleted_again = storage.delete_file("to_delete.rs").unwrap();
    assert!(!deleted_again);
}

#[test]
fn test_delete_multiple_files() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert multiple files
    storage.upsert_file("file1.rs", "hash1").unwrap();
    storage.upsert_file("file2.rs", "hash2").unwrap();
    storage.upsert_file("file3.rs", "hash3").unwrap();

    // Delete two of them
    let paths = vec!["file1.rs".to_string(), "file3.rs".to_string()];
    let deleted = storage.delete_files(&paths).unwrap();
    assert_eq!(deleted, 2);

    // Verify file2 still exists
    assert!(storage.get_file("file2.rs").unwrap().is_some());
    assert!(storage.get_file("file1.rs").unwrap().is_none());
}

#[test]
fn test_get_all_file_paths() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert some files
    storage.upsert_file("src/main.rs", "hash1").unwrap();
    storage.upsert_file("src/lib.rs", "hash2").unwrap();
    storage.upsert_file("tests/test.rs", "hash3").unwrap();

    let paths = storage.get_all_file_paths().unwrap();
    assert_eq!(paths.len(), 3);
    assert!(paths.contains(&"src/main.rs".to_string()));
    assert!(paths.contains(&"src/lib.rs".to_string()));
    assert!(paths.contains(&"tests/test.rs".to_string()));
}

#[test]
fn test_chunk_insert() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert a file first
    let file_id = storage.upsert_file("test.rs", "hash").unwrap();

    // Create a dummy embedding
    let embedding: Vec<f32> = (0..64).map(|i| i as f32 / 64.0).collect();

    // Insert a chunk
    let chunk_id = storage
        .insert_chunk(file_id, 0, "fn main() {}", 1, 3, &embedding)
        .unwrap();

    assert!(chunk_id > 0);
}

#[test]
fn test_chunk_batch_insert() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    let file_id = storage.upsert_file("test.rs", "hash").unwrap();

    // Create multiple chunks
    let chunks: Vec<(i32, String, i32, i32, Vec<f32>)> = (0..5)
        .map(|i| {
            let embedding: Vec<f32> = (0..64).map(|j| (i * 64 + j) as f32 / 320.0).collect();
            (i, format!("chunk {}", i), i * 10, (i + 1) * 10, embedding)
        })
        .collect();

    storage.insert_chunks_batch(file_id, &chunks).unwrap();

    let stats = storage.get_stats().unwrap();
    assert_eq!(stats.chunk_count, 5);
}

#[test]
fn test_search() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert file and chunks
    let file_id = storage.upsert_file("test.rs", "hash").unwrap();

    // Insert chunks with different embeddings
    for i in 0..3 {
        let embedding: Vec<f32> = (0..64).map(|j| if j == i { 1.0 } else { 0.0 }).collect();

        storage
            .insert_chunk(
                file_id,
                i as i32,
                &format!("chunk content {}", i),
                i as i32 * 10,
                (i as i32 + 1) * 10,
                &embedding,
            )
            .unwrap();
    }

    // Search with a query embedding similar to chunk 0
    let query: Vec<f32> = (0..64).map(|j| if j == 0 { 1.0 } else { 0.0 }).collect();

    let results = storage.search(&query, 3, None).unwrap();

    assert!(!results.is_empty());
    // First result should be the most similar
    assert!(results[0].chunk_content.contains("chunk content"));
}

#[test]
fn test_search_with_folder_filter() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert files in different folders
    let file1_id = storage.upsert_file("src/main.rs", "hash1").unwrap();
    let file2_id = storage.upsert_file("tests/test.rs", "hash2").unwrap();

    let embedding: Vec<f32> = (0..64).map(|i| i as f32 / 64.0).collect();

    storage
        .insert_chunk(file1_id, 0, "main content", 1, 10, &embedding)
        .unwrap();
    storage
        .insert_chunk(file2_id, 0, "test content", 1, 10, &embedding)
        .unwrap();

    // Search only in src/
    let results = storage.search(&embedding, 10, Some("src/")).unwrap();

    assert_eq!(results.len(), 1);
    assert_eq!(results[0].file_path, "src/main.rs");
}

#[test]
fn test_stats_empty_database() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    let stats = storage.get_stats().unwrap();
    assert_eq!(stats.file_count, 0);
    assert_eq!(stats.chunk_count, 0);
    assert_eq!(stats.total_content_size, 0);
}

#[test]
fn test_stats_with_data() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    let file_id = storage.upsert_file("test.rs", "hash").unwrap();
    let embedding: Vec<f32> = vec![0.0; 64];

    storage
        .insert_chunk(file_id, 0, "hello world", 1, 1, &embedding)
        .unwrap();
    storage
        .insert_chunk(file_id, 1, "goodbye", 2, 2, &embedding)
        .unwrap();

    let stats = storage.get_stats().unwrap();
    assert_eq!(stats.file_count, 1);
    assert_eq!(stats.chunk_count, 2);
    assert_eq!(stats.total_content_size, 18); // "hello world" + "goodbye"
}

#[test]
fn test_clear() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Add some data
    let file_id = storage.upsert_file("test.rs", "hash").unwrap();
    let embedding: Vec<f32> = vec![0.0; 64];
    storage
        .insert_chunk(file_id, 0, "content", 1, 1, &embedding)
        .unwrap();

    // Verify data exists
    let stats = storage.get_stats().unwrap();
    assert_eq!(stats.file_count, 1);

    // Clear
    storage.clear().unwrap();

    // Verify empty
    let stats = storage.get_stats().unwrap();
    assert_eq!(stats.file_count, 0);
    assert_eq!(stats.chunk_count, 0);
}

#[test]
fn test_file_update_clears_chunks() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Insert file with chunks
    let file_id = storage.upsert_file("test.rs", "hash1").unwrap();
    let embedding: Vec<f32> = vec![0.0; 64];
    storage
        .insert_chunk(file_id, 0, "old content", 1, 1, &embedding)
        .unwrap();

    assert_eq!(storage.get_stats().unwrap().chunk_count, 1);

    // Update file (should clear old chunks)
    storage.upsert_file("test.rs", "hash2").unwrap();

    // Chunks should be cleared
    assert_eq!(storage.get_stats().unwrap().chunk_count, 0);
}

#[test]
fn test_needs_rebuild() {
    let storage = match create_test_storage(64) {
        Some(s) => s,
        None => {
            eprintln!("Skipping test: VSS extension not available");
            return;
        }
    };

    // Empty database doesn't need rebuild
    assert!(!storage.needs_rebuild(128).unwrap());

    // Add some data
    let file_id = storage.upsert_file("test.rs", "hash").unwrap();
    let embedding: Vec<f32> = vec![0.0; 64];
    storage
        .insert_chunk(file_id, 0, "content", 1, 1, &embedding)
        .unwrap();

    // Same dimensions don't need rebuild
    assert!(!storage.needs_rebuild(64).unwrap());

    // Different dimensions need rebuild
    assert!(storage.needs_rebuild(128).unwrap());
}