sqry-core 7.1.4

Core library for sqry - semantic code search engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
//! Integration tests for Project persistence
//!
//! Tests the full persist + reload cycle as specified in:
//! - `docs/development/project-persistence-019af0c9-428d-7000-b1d6-08961d6930b0/05_TEST_PLAN.md`

use sqry_core::config::{CacheConfig, IndexingConfig};
use sqry_core::project::Project;
use sqry_core::project::persistence::{
    ProjectPersistence, build_persisted_state, compute_config_fingerprint,
};
use sqry_core::project::types::ProjectId;
use std::collections::HashMap;
use std::fs;
use tempfile::TempDir;

/// FR-PERSIST-1, FR-PERSIST-2, FR-PERSIST-4: Happy path integration test
///
/// Build temp project, populate repo/file entries + index cache, call persist,
/// drop, recreate, assert preload restored metadata.
#[test]
fn test_persist_and_reload_happy_path() {
    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    // Create a .git directory so it's detected as a repo
    fs::create_dir_all(index_root.join(".git")).unwrap();

    // Create first project and initialize
    let project1 = Project::new(index_root.clone()).unwrap();
    project1.initialize().unwrap();

    // Add some files to the file table
    // Note: register_file is internal; we rely on initialize() to discover files
    // For this test, we verify that persistence works with the initialized state

    // Verify repo was detected (we created .git dir)
    assert!(project1.repo_count() >= 1, "Repo should be detected");
    let repo_count = project1.repo_count();

    // Persist state
    project1.persist_if_configured();

    // Verify persisted files exist
    let state_path = index_root
        .join(".sqry-cache")
        .join("project-state")
        .join(format!("proj_{:016x}.json", project1.id.as_u64()));
    assert!(
        state_path.exists(),
        "Persisted state file should exist at {state_path:?}"
    );

    // Drop project
    drop(project1);

    // Create new project with same root
    let project2 = Project::new(index_root.clone()).unwrap();
    project2.initialize().unwrap();

    // Verify state was preloaded
    assert_eq!(
        project2.repo_count(),
        repo_count,
        "Repo count should match after preload"
    );
}

/// FR-PERSIST-3: Opt-out test
///
/// With `cache.persistent=false`, persist yields no files and no errors.
#[test]
fn test_opt_out_no_files_created() {
    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    // Create project with persistence disabled
    // Note: We need to use the actual Project which loads from .sqry-config.toml
    // For this test, we'll use the persistence helper directly
    let _persistence = ProjectPersistence::new(&index_root, ".sqry-cache");
    let _project_id = ProjectId::from_index_root(&index_root);

    let cache = CacheConfig {
        directory: ".sqry-cache".to_string(),
        persistent: false,
    };

    // Verify persistent is false
    assert!(!cache.persistent);

    // Check no state directory exists
    let state_dir = index_root.join(".sqry-cache").join("project-state");
    assert!(
        !state_dir.exists(),
        "State directory should not exist before any persist"
    );
}

/// FR-PERSIST-3: Read-only path test
///
/// If the cache directory is read-only, persistence warns and returns error without panic.
#[test]
#[cfg(unix)]
fn test_read_only_path_warns_no_panic() {
    use std::os::unix::fs::PermissionsExt;

    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();
    let cache_dir = index_root.join(".sqry-cache");

    // Create cache directory and make it read-only
    fs::create_dir_all(&cache_dir).unwrap();
    let mut perms = fs::metadata(&cache_dir).unwrap().permissions();
    perms.set_mode(0o500); // read + execute only
    fs::set_permissions(&cache_dir, perms).unwrap();

    let persistence = ProjectPersistence::new(&index_root, ".sqry-cache");
    let project_id = ProjectId::from_index_root(&index_root);

    let state = build_persisted_state(
        project_id,
        &index_root,
        12345,
        &HashMap::new(),
        &HashMap::new(),
    );

    // This should fail but not panic
    let result = persistence.write_metadata(&state);
    assert!(result.is_err(), "Write should fail on read-only directory");

    // Restore permissions for cleanup
    let mut perms = fs::metadata(&cache_dir).unwrap().permissions();
    perms.set_mode(0o700);
    fs::set_permissions(&cache_dir, perms).unwrap();
}

/// FR-PERSIST-4: Checksum/version mismatch test
///
/// Corrupt JSON or bump version to verify fallback to repo detection.
#[test]
fn test_checksum_mismatch_falls_back() {
    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    let persistence = ProjectPersistence::new(&index_root, ".sqry-cache");
    let project_id = ProjectId::from_index_root(&index_root);

    // Create a valid state
    let state = build_persisted_state(
        project_id,
        &index_root,
        12345,
        &HashMap::new(),
        &HashMap::new(),
    );

    // Write it
    persistence.write_metadata(&state).unwrap();

    // Corrupt the file by modifying it
    let state_path = index_root
        .join(".sqry-cache")
        .join("project-state")
        .join(format!("proj_{:016x}.json", project_id.as_u64()));

    let mut contents = fs::read_to_string(&state_path).unwrap();
    // Corrupt the checksum by replacing the first hex character
    contents = contents.replace("\"checksum\":", "\"checksum\": \"corrupted");
    fs::write(&state_path, contents).unwrap();

    // Try to read - should fail
    let result = persistence.read_metadata(project_id);
    assert!(
        result.is_err() || result.unwrap().is_none(),
        "Corrupted state should fail to load"
    );
}

/// NFR-4: Concurrency test
///
/// Two projects persisting concurrently to separate roots without race/panic.
#[test]
fn test_concurrent_persist_separate_roots() {
    use std::thread;

    let handles: Vec<_> = (0..4)
        .map(|i| {
            thread::spawn(move || {
                let tmp = TempDir::new().unwrap();
                let index_root = tmp.path().to_path_buf();

                // Create .git for repo detection
                fs::create_dir_all(index_root.join(".git")).unwrap();

                let project = Project::new(index_root.clone()).unwrap();
                project.initialize().unwrap();

                // Project is initialized with repo detected from .git

                // Persist
                project.persist_if_configured();

                // Verify persistence worked
                let state_path = index_root.join(".sqry-cache").join("project-state");
                assert!(
                    state_path.exists(),
                    "Thread {i}: State directory should exist"
                );
            })
        })
        .collect();

    // Wait for all threads
    for (i, handle) in handles.into_iter().enumerate() {
        handle
            .join()
            .unwrap_or_else(|_| panic!("Thread {i} panicked"));
    }
}

/// Test custom cache directory (relative path)
#[test]
fn test_custom_cache_directory_relative() {
    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    let persistence = ProjectPersistence::new(&index_root, "custom-cache");
    let project_id = ProjectId::from_index_root(&index_root);

    let state = build_persisted_state(
        project_id,
        &index_root,
        12345,
        &HashMap::new(),
        &HashMap::new(),
    );

    persistence.write_metadata(&state).unwrap();

    // Check file exists at custom location
    let state_path = index_root
        .join("custom-cache")
        .join("project-state")
        .join(format!("proj_{:016x}.json", project_id.as_u64()));
    assert!(
        state_path.exists(),
        "State should be in custom cache directory"
    );
}

/// Test that absolute cache directory paths are rejected for security.
///
/// Security fix: Absolute paths could allow writes outside project root.
/// The persistence layer now rejects them and falls back to default.
#[test]
fn test_absolute_cache_directory_rejected() {
    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    let cache_tmp = TempDir::new().unwrap();
    let cache_dir = cache_tmp.path().to_string_lossy().to_string();

    // Create persistence with absolute path - should be rejected
    let persistence = ProjectPersistence::new(&index_root, &cache_dir);
    let project_id = ProjectId::from_index_root(&index_root);

    let state = build_persisted_state(
        project_id,
        &index_root,
        12345,
        &HashMap::new(),
        &HashMap::new(),
    );

    persistence.write_metadata(&state).unwrap();

    // Verify file is NOT at absolute location (security: rejected)
    let escaped_path = cache_tmp
        .path()
        .join("project-state")
        .join(format!("proj_{:016x}.json", project_id.as_u64()));
    assert!(
        !escaped_path.exists(),
        "Absolute path should be rejected; state should NOT escape project"
    );

    // Verify file IS at default location (under index_root)
    let default_path = index_root
        .join(".sqry-cache")
        .join("project-state")
        .join(format!("proj_{:016x}.json", project_id.as_u64()));
    assert!(
        default_path.exists(),
        "State should be in default cache directory when absolute path rejected"
    );
}

/// Test config fingerprint invalidation
#[test]
fn test_config_fingerprint_invalidates_on_change() {
    let cache1 = CacheConfig::default();
    let indexing1 = IndexingConfig::default();

    let cache2 = CacheConfig {
        directory: ".other-cache".to_string(),
        ..Default::default()
    };
    let indexing2 = IndexingConfig::default();

    let fp1 = compute_config_fingerprint(&cache1, &indexing1);
    let fp2 = compute_config_fingerprint(&cache2, &indexing2);

    assert_ne!(fp1, fp2, "Fingerprint should change when config changes");
}

/// FR-PERSIST-2: File table fidelity test (MEDIUM finding from review)
///
/// Verify that file metadata (`repo_id`, `content_hash`, `language_id`) survives
/// the persist/reload cycle.
#[test]
fn test_file_table_fidelity_integration() {
    use sqry_core::project::types::{FileEntry, StringId};
    use std::sync::Arc;
    use std::time::SystemTime;

    let tmp = TempDir::new().unwrap();
    let index_root = tmp.path().to_path_buf();

    // Create .git for repo detection
    fs::create_dir_all(index_root.join(".git")).unwrap();

    // Create test files (create directory first!)
    fs::create_dir_all(index_root.join("src")).unwrap();
    fs::write(index_root.join("src/main.rs"), "fn main() {}").unwrap();
    fs::write(index_root.join("src/lib.rs"), "pub mod foo;").unwrap();

    // Create project and initialize
    let project = Project::new(index_root.clone()).unwrap();
    project.initialize().unwrap();

    // Get the detected repo
    let repo_index = project.repo_index();
    assert!(!repo_index.is_empty(), "Repo should be detected");

    // Register files with full metadata
    let repo_id = *repo_index.values().next().unwrap();
    let path1: StringId = Arc::from("src/main.rs");
    let path2: StringId = Arc::from("src/lib.rs");

    let entry1 = FileEntry::with_metadata(
        Arc::clone(&path1),
        repo_id,
        Some(0xdead_beef_cafe_babe),
        Some(SystemTime::now()),
        Some(Arc::from("rust")),
    );

    let entry2 = FileEntry::with_metadata(
        Arc::clone(&path2),
        repo_id,
        Some(0x1234_5678_9abc_def0),
        Some(SystemTime::now()),
        Some(Arc::from("rust")),
    );

    project.register_file(entry1);
    project.register_file(entry2);

    // Persist
    project.persist_if_configured();

    // Drop and recreate
    drop(project);

    let project2 = Project::new(index_root.clone()).unwrap();
    project2.initialize().unwrap();

    // Verify file count
    assert!(
        project2.file_count() >= 2,
        "Should have at least 2 files after preload"
    );

    // Get file and verify repo_id is restored (not NONE)
    if let Some(file) = project2.get_file("src/main.rs") {
        assert!(
            file.repo_id.is_some(),
            "RepoId should be restored (not NONE)"
        );
        assert_eq!(
            file.content_hash,
            Some(0xdead_beef_cafe_babe),
            "content_hash should be preserved"
        );
        assert_eq!(
            file.language_id.as_deref(),
            Some("rust"),
            "language_id should be preserved"
        );
    }

    if let Some(file) = project2.get_file("src/lib.rs") {
        assert!(
            file.repo_id.is_some(),
            "RepoId should be restored (not NONE)"
        );
        assert_eq!(
            file.content_hash,
            Some(0x1234_5678_9abc_def0),
            "content_hash should be preserved"
        );
    }
}

/// FR-PERSIST-2: Verify `RepoId` survives persist/reload
#[test]
fn test_repo_id_fidelity() {
    use sqry_core::project::types::{FileEntry, StringId};
    use std::sync::Arc;

    let tmp = TempDir::new().unwrap();
    // Canonicalize so the path matches what detect_repos_under() stores
    // (on macOS, /var → /private/var symlink causes mismatch otherwise)
    let index_root = tmp.path().canonicalize().unwrap();

    // Create main .git
    fs::create_dir_all(index_root.join(".git")).unwrap();

    // Create file in main repo
    fs::create_dir_all(index_root.join("src")).unwrap();
    fs::write(index_root.join("src/main.rs"), "fn main() {}").unwrap();
    fs::write(index_root.join("src/lib.rs"), "pub mod foo;").unwrap();

    // Create project and initialize
    let project = Project::new(index_root.clone()).unwrap();
    project.initialize().unwrap();

    // Should detect at least 1 repo
    let repo_index = project.repo_index();
    assert!(!repo_index.is_empty(), "Should detect at least 1 repo");

    // Get the main repo's RepoId
    let main_repo_id = *repo_index.get(&index_root).expect("main repo should exist");

    // Register files
    let path1: StringId = Arc::from("src/main.rs");
    let path2: StringId = Arc::from("src/lib.rs");
    let entry1 = FileEntry::new(Arc::clone(&path1), main_repo_id);
    let entry2 = FileEntry::new(Arc::clone(&path2), main_repo_id);
    project.register_file(entry1);
    project.register_file(entry2);

    // Persist
    project.persist_if_configured();

    // Drop and recreate
    drop(project);

    let project2 = Project::new(index_root.clone()).unwrap();
    project2.initialize().unwrap();

    // Verify repos are restored
    let repo_index2 = project2.repo_index();
    assert_eq!(
        repo_index.len(),
        repo_index2.len(),
        "Repo count should match after preload"
    );

    // Verify each repo has correct RepoId
    for (git_root, original_id) in &repo_index {
        let restored_id = repo_index2.get(git_root);
        assert!(
            restored_id.is_some(),
            "Repo {git_root:?} should exist after preload"
        );
        assert_eq!(
            restored_id.unwrap(),
            original_id,
            "RepoId should match for {git_root:?}"
        );
    }

    // Verify files have correct repo associations (not RepoId::NONE)
    if let Some(main_file) = project2.get_file("src/main.rs") {
        assert!(
            main_file.repo_id.is_some(),
            "main.rs RepoId should not be NONE"
        );
        assert_eq!(
            main_file.repo_id, main_repo_id,
            "main.rs should belong to main repo"
        );
    }

    if let Some(lib_file) = project2.get_file("src/lib.rs") {
        assert!(
            lib_file.repo_id.is_some(),
            "lib.rs RepoId should not be NONE"
        );
        assert_eq!(
            lib_file.repo_id, main_repo_id,
            "lib.rs should belong to main repo"
        );
    }
}

/// Test path traversal protection in integration context
#[test]
fn test_path_traversal_protection() {
    // Use nested temp directory structure so the parent is also under our control
    // This avoids flakiness from pre-existing /tmp/.sqry-cache from other test runs
    let outer_tmp = TempDir::new().unwrap();
    let index_root = outer_tmp.path().join("workspace");
    fs::create_dir_all(&index_root).unwrap();

    // Create .git for repo detection
    fs::create_dir_all(index_root.join(".git")).unwrap();

    // Create project - this uses the default .sqry-cache directory
    let project = Project::new(index_root.clone()).unwrap();
    project.initialize().unwrap();
    project.persist_if_configured();

    // Verify state file is under index_root
    let state_dir = index_root.join(".sqry-cache").join("project-state");
    assert!(
        state_dir.exists(),
        "State directory should be under index_root"
    );

    // Verify no files escaped to parent directories (parent is now inside our temp dir)
    let parent = index_root.parent().unwrap();
    let escaped_path = parent.join(".sqry-cache");
    assert!(
        !escaped_path.exists(),
        "State should not escape to parent directory: {escaped_path:?}"
    );
}