1use ignore::WalkBuilder;
2use sha2::{Digest, Sha256};
3use std::fs::File;
4use std::io::Read;
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct FileHash {
10 pub path: PathBuf, pub hash: String, }
13
14#[derive(Debug, Clone)]
16pub struct IndexerOptions {
17 pub max_file_size_bytes: u64, pub max_file_count: usize, pub follow_symlinks: bool, pub respect_gitignore: bool, pub custom_ignore_file: Option<String>, }
23
24impl Default for IndexerOptions {
25 fn default() -> Self {
26 Self {
27 max_file_size_bytes: 500 * 1024,
28 max_file_count: 100000,
29 follow_symlinks: false,
30 respect_gitignore: true,
31 custom_ignore_file: Some(".codebonesignore".to_string()),
32 }
33 }
34}
35
36pub trait Indexer {
38 fn index(
40 &self,
41 workspace_root: &Path,
42 options: &IndexerOptions,
43 ) -> Result<Vec<FileHash>, IndexerError>;
44}
45
46#[derive(Debug, thiserror::Error)]
48pub enum IndexerError {
49 #[error("Path traversal detected: {0}")]
50 PathTraversal(PathBuf),
51 #[error("Symlink escape detected: {0}")]
52 SymlinkEscape(PathBuf),
53 #[error("IO error: {0}")]
54 Io(#[from] std::io::Error),
55 #[error("File count limit exceeded")]
56 FileCountLimitExceeded,
57}
58
59pub struct DefaultIndexer;
60
61impl Indexer for DefaultIndexer {
62 fn index(
63 &self,
64 workspace_root: &Path,
65 options: &IndexerOptions,
66 ) -> Result<Vec<FileHash>, IndexerError> {
67 let mut results = Vec::new();
68 let mut count = 0;
69
70 let mut builder = WalkBuilder::new(workspace_root);
71 builder.follow_links(options.follow_symlinks);
72 builder.git_ignore(options.respect_gitignore);
73 builder.git_exclude(options.respect_gitignore);
74 builder.git_global(options.respect_gitignore);
75 builder.ignore(options.respect_gitignore);
76 builder.require_git(false);
77
78 if let Some(ref custom) = options.custom_ignore_file {
79 builder.add_custom_ignore_filename(custom);
80 }
81
82 let walker = builder.build();
83
84 let canonical_root = std::fs::canonicalize(workspace_root)?;
85
86 for result in walker {
87 let entry = match result {
88 Ok(e) => e,
89 Err(_) => continue,
90 };
91
92 let path = entry.path();
93 if path.is_dir() {
94 continue;
95 }
96
97 let canonical_path = match std::fs::canonicalize(path) {
99 Ok(p) => p,
100 Err(_) => continue, };
102 if !canonical_path.starts_with(&canonical_root) {
103 return Err(IndexerError::PathTraversal(path.to_path_buf()));
104 }
105
106 if entry.path_is_symlink() && !options.follow_symlinks {
112 continue; }
114
115 let file_name = path.file_name().unwrap_or_default().to_string_lossy();
117 if file_name == ".env"
118 || file_name.starts_with(".env.")
119 || file_name == ".envrc"
120 || file_name.ends_with(".pem")
121 || file_name.ends_with(".key")
122 || file_name.ends_with(".tfvars")
123 || file_name.ends_with(".p12")
124 || file_name.ends_with(".pfx")
125 || file_name.ends_with(".jks")
126 || file_name.starts_with("id_rsa")
127 || file_name.starts_with("id_ed25519")
128 || file_name == "id_ecdsa"
129 || file_name == "id_dsa"
130 || file_name == "id_ecdsa_sk"
131 || file_name == "id_xmss"
132 || file_name == "credentials.json"
133 || file_name.ends_with(".secrets")
134 || file_name.ends_with(".token")
135 || file_name == ".npmrc"
136 || file_name == ".netrc"
137 {
138 continue;
139 }
140
141 let ext = path
143 .extension()
144 .unwrap_or_default()
145 .to_string_lossy()
146 .to_lowercase();
147 if [
148 "exe", "dll", "so", "png", "jpg", "jpeg", "pdf", "db", "sqlite", "wasm",
149 ]
150 .contains(&ext.as_str())
151 {
152 continue;
153 }
154
155 let metadata = match std::fs::metadata(path) {
157 Ok(metadata) => metadata,
158 Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => continue,
159 Err(error) => return Err(error.into()),
160 };
161 if metadata.len() > options.max_file_size_bytes {
162 continue;
163 }
164
165 let mut file = match File::open(path) {
167 Ok(file) => file,
168 Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => continue,
169 Err(error) => return Err(error.into()),
170 };
171 let mut buffer = [0; 8192];
172 let bytes_read = match file.read(&mut buffer) {
173 Ok(bytes_read) => bytes_read,
174 Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => continue,
175 Err(error) => return Err(error.into()),
176 };
177 let chunk = &buffer[..bytes_read];
178 if chunk.contains(&0) {
179 continue;
180 }
181 if chunk.windows(11).any(|w| w == b"-----BEGIN ") {
183 continue;
184 }
185
186 let mut hasher = Sha256::new();
188 let mut file = match File::open(path) {
189 Ok(file) => file,
190 Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => continue,
191 Err(error) => return Err(error.into()),
192 };
193 match std::io::copy(&mut file, &mut hasher) {
194 Ok(_) => {}
195 Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => continue,
196 Err(error) => return Err(error.into()),
197 }
198 let hash = hex::encode(hasher.finalize());
199
200 let rel_path = path
201 .strip_prefix(workspace_root)
202 .unwrap_or(path)
203 .to_path_buf();
204
205 results.push(FileHash {
206 path: rel_path,
207 hash,
208 });
209
210 count += 1;
211 if count >= options.max_file_count {
212 return Err(IndexerError::FileCountLimitExceeded);
213 }
214 }
215
216 Ok(results)
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223 use std::fs::{self, File};
224 use std::io::Write;
225 use tempfile::TempDir;
226
227 fn setup_workspace() -> TempDir {
228 TempDir::new().unwrap()
229 }
230
231 #[test]
232 fn test_skips_symlinks_escaping_root() {
233 let dir = setup_workspace();
234 let root = dir.path();
235
236 let out_dir = TempDir::new().unwrap();
237 let out_file = out_dir.path().join("out.txt");
238 fs::write(&out_file, "out").unwrap();
239
240 let symlink_path = root.join("link");
241 #[cfg(unix)]
242 std::os::unix::fs::symlink(&out_file, &symlink_path).unwrap();
243
244 let indexer = DefaultIndexer;
245 let options = IndexerOptions {
246 follow_symlinks: true,
247 ..Default::default()
248 };
249
250 let result = indexer.index(root, &options);
251 assert!(matches!(result, Err(IndexerError::PathTraversal(_))));
252 }
253
254 #[test]
255 fn test_ignores_env_and_secret_files() {
256 let dir = setup_workspace();
257 let root = dir.path();
258 fs::write(root.join(".env"), "secret").unwrap();
259 fs::write(root.join("id_rsa"), "secret").unwrap();
260 fs::write(root.join("config.pem"), "secret").unwrap();
261 fs::write(root.join("normal.txt"), "normal").unwrap();
262
263 let indexer = DefaultIndexer;
264 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
265 assert_eq!(results.len(), 1);
266 assert_eq!(results[0].path, PathBuf::from("normal.txt"));
267 }
268
269 #[test]
270 fn test_ignores_gitignore() {
271 let dir = setup_workspace();
272 let root = dir.path();
273 fs::create_dir(root.join("ignored_dir")).unwrap();
274 fs::write(root.join("ignored_dir/test.txt"), "ignored").unwrap();
275 fs::write(root.join(".gitignore"), "ignored_dir/").unwrap();
276
277 let indexer = DefaultIndexer;
278 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
279 assert!(results.iter().all(|r| !r.path.starts_with("ignored_dir")));
280 }
281
282 #[test]
283 fn test_ignores_codebonesignore() {
284 let dir = setup_workspace();
285 let root = dir.path();
286 fs::create_dir(root.join("drafts")).unwrap();
287 fs::write(root.join("drafts/test.txt"), "ignored").unwrap();
288 fs::write(root.join(".codebonesignore"), "drafts/").unwrap();
289
290 let indexer = DefaultIndexer;
291 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
292 assert!(results.iter().all(|r| !r.path.starts_with("drafts")));
293 }
294
295 #[test]
296 fn test_skips_large_files() {
297 let dir = setup_workspace();
298 let root = dir.path();
299 let mut file = File::create(root.join("large.txt")).unwrap();
300 file.write_all(&vec![b'a'; 600 * 1024]).unwrap();
301
302 let indexer = DefaultIndexer;
303 let options = IndexerOptions {
304 max_file_size_bytes: 500 * 1024,
305 ..Default::default()
306 };
307 let results = indexer.index(root, &options).unwrap();
308 assert!(results.is_empty());
309 }
310
311 #[test]
312 fn test_skips_binary_extension() {
313 let dir = setup_workspace();
314 let root = dir.path();
315 fs::write(root.join("test.exe"), "fake binary").unwrap();
316
317 let indexer = DefaultIndexer;
318 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
319 assert!(results.is_empty());
320 }
321
322 #[test]
323 fn test_skips_binary_null_bytes() {
324 let dir = setup_workspace();
325 let root = dir.path();
326 fs::write(root.join("fake.txt"), b"hello\0world").unwrap();
327
328 let indexer = DefaultIndexer;
329 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
330 assert!(results.is_empty());
331 }
332
333 #[test]
334 fn test_replaces_invalid_utf8() {
335 let dir = setup_workspace();
336 let root = dir.path();
337 fs::write(root.join("invalid.txt"), b"hello\xFFworld").unwrap();
338
339 let indexer = DefaultIndexer;
340 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
341 assert_eq!(results.len(), 1);
342 }
343
344 #[test]
345 fn test_stops_at_file_count_limit() {
346 let dir = setup_workspace();
347 let root = dir.path();
348 for i in 0..10 {
349 fs::write(root.join(format!("{}.txt", i)), "test").unwrap();
350 }
351
352 let indexer = DefaultIndexer;
353 let options = IndexerOptions {
354 max_file_count: 5,
355 ..Default::default()
356 };
357 let result = indexer.index(root, &options);
358 assert!(matches!(result, Err(IndexerError::FileCountLimitExceeded)));
359 }
360
361 #[test]
362 fn test_generates_correct_hash() {
363 let dir = setup_workspace();
364 let root = dir.path();
365 fs::write(root.join("test.txt"), "hello world").unwrap();
366
367 let indexer = DefaultIndexer;
368 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
369 assert_eq!(results.len(), 1);
370 assert_eq!(
371 results[0].hash,
372 "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
373 );
374 }
375
376 #[test]
379 fn test_excludes_dotenv_file() {
380 let dir = setup_workspace();
381 let root = dir.path();
382 fs::write(root.join(".env"), "SECRET=hunter2").unwrap();
383
384 let indexer = DefaultIndexer;
385 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
386 let names: Vec<_> = results
387 .iter()
388 .map(|r| r.path.to_string_lossy().to_string())
389 .collect();
390 assert!(
391 !names.iter().any(|n| n == ".env"),
392 ".env must be excluded, got: {:?}",
393 names
394 );
395 }
396
397 #[test]
398 fn test_excludes_id_rsa_file() {
399 let dir = setup_workspace();
400 let root = dir.path();
401 fs::write(root.join("id_rsa"), "-----BEGIN RSA PRIVATE KEY-----").unwrap();
402
403 let indexer = DefaultIndexer;
404 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
405 let names: Vec<_> = results
406 .iter()
407 .map(|r| r.path.to_string_lossy().to_string())
408 .collect();
409 assert!(
410 !names.iter().any(|n| n == "id_rsa"),
411 "id_rsa must be excluded, got: {:?}",
412 names
413 );
414 }
415
416 #[test]
417 fn test_excludes_credentials_json_file() {
418 let dir = setup_workspace();
419 let root = dir.path();
420 fs::write(root.join("credentials.json"), r#"{"token":"secret"}"#).unwrap();
421
422 let indexer = DefaultIndexer;
423 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
424 let names: Vec<_> = results
425 .iter()
426 .map(|r| r.path.to_string_lossy().to_string())
427 .collect();
428 assert!(
429 !names.iter().any(|n| n == "credentials.json"),
430 "credentials.json must be excluded, got: {:?}",
431 names
432 );
433 }
434
435 #[test]
436 fn test_excludes_pem_header_file() {
437 let dir = setup_workspace();
440 let root = dir.path();
441 fs::write(
443 root.join("server.crt"),
444 "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----\n",
445 )
446 .unwrap();
447 fs::write(root.join("normal.txt"), "just text").unwrap();
448
449 let indexer = DefaultIndexer;
450 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
451 let names: Vec<_> = results
452 .iter()
453 .map(|r| r.path.to_string_lossy().to_string())
454 .collect();
455 assert!(
456 !names.iter().any(|n| n == "server.crt"),
457 "File with PEM header must be excluded, got: {:?}",
458 names
459 );
460 assert!(
461 names.iter().any(|n| n == "normal.txt"),
462 "normal.txt must still be indexed, got: {:?}",
463 names
464 );
465 }
466
467 #[test]
468 fn test_normal_rs_file_is_not_excluded() {
469 let dir = setup_workspace();
470 let root = dir.path();
471 fs::write(
472 root.join("lib.rs"),
473 "pub fn add(a: i32, b: i32) -> i32 { a + b }",
474 )
475 .unwrap();
476
477 let indexer = DefaultIndexer;
478 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
479 let names: Vec<_> = results
480 .iter()
481 .map(|r| r.path.to_string_lossy().to_string())
482 .collect();
483 assert!(
484 names.iter().any(|n| n == "lib.rs"),
485 "lib.rs must be indexed, got: {:?}",
486 names
487 );
488 }
489
490 #[test]
493 fn test_excludes_exe_extension() {
494 let dir = setup_workspace();
495 let root = dir.path();
496 fs::write(root.join("app.exe"), "MZ fake windows binary").unwrap();
497
498 let indexer = DefaultIndexer;
499 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
500 let names: Vec<_> = results
501 .iter()
502 .map(|r| r.path.to_string_lossy().to_string())
503 .collect();
504 assert!(
505 !names.iter().any(|n| n.ends_with(".exe")),
506 ".exe must be excluded, got: {:?}",
507 names
508 );
509 }
510
511 #[test]
512 fn test_excludes_png_extension() {
513 let dir = setup_workspace();
514 let root = dir.path();
515 fs::write(root.join("logo.png"), b"\x89PNG\r\n\x1a\nfake image data").unwrap();
517
518 let indexer = DefaultIndexer;
519 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
520 let names: Vec<_> = results
521 .iter()
522 .map(|r| r.path.to_string_lossy().to_string())
523 .collect();
524 assert!(
525 !names.iter().any(|n| n.ends_with(".png")),
526 ".png must be excluded, got: {:?}",
527 names
528 );
529 }
530
531 #[test]
532 fn test_excludes_source_file_with_null_bytes() {
533 let dir = setup_workspace();
536 let root = dir.path();
537 let mut content = b"fn main() { println!(\"hello\"); }\n".to_vec();
538 content.push(0x00); content.extend_from_slice(b" // more code");
540 fs::write(root.join("tricky.rs"), &content).unwrap();
541
542 let indexer = DefaultIndexer;
543 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
544 let names: Vec<_> = results
545 .iter()
546 .map(|r| r.path.to_string_lossy().to_string())
547 .collect();
548 assert!(
549 !names.iter().any(|n| n == "tricky.rs"),
550 "Source file with null bytes must be excluded, got: {:?}",
551 names
552 );
553 }
554
555 #[test]
558 fn test_codebonesignore_glob_excludes_toml_files() {
559 let dir = setup_workspace();
561 let root = dir.path();
562 fs::write(root.join("Cargo.toml"), "[package]\nname = \"test\"").unwrap();
563 fs::write(root.join("main.rs"), "fn main() {}").unwrap();
564 fs::write(root.join(".codebonesignore"), "*.toml\n").unwrap();
565
566 let indexer = DefaultIndexer;
567 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
568 let names: Vec<_> = results
569 .iter()
570 .map(|r| r.path.to_string_lossy().to_string())
571 .collect();
572
573 assert!(
574 !names.iter().any(|n| n.ends_with(".toml")),
575 "*.toml files must be excluded via .codebonesignore, got: {:?}",
576 names
577 );
578 assert!(
579 names.iter().any(|n| n == "main.rs"),
580 "main.rs must still be indexed, got: {:?}",
581 names
582 );
583 }
584
585 #[test]
586 fn test_gitignore_glob_excludes_matching_files() {
587 let dir = setup_workspace();
589 let root = dir.path();
590 fs::write(root.join("app.log"), "INFO: server started").unwrap();
591 fs::write(root.join("server.rs"), "fn serve() {}").unwrap();
592 fs::write(root.join(".gitignore"), "*.log\n").unwrap();
593
594 let indexer = DefaultIndexer;
595 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
596 let names: Vec<_> = results
597 .iter()
598 .map(|r| r.path.to_string_lossy().to_string())
599 .collect();
600
601 assert!(
602 !names.iter().any(|n| n.ends_with(".log")),
603 "*.log files must be excluded via .gitignore, got: {:?}",
604 names
605 );
606 }
607
608 #[test]
609 fn test_only_rs_files_indexed_when_all_others_ignored() {
610 let dir = setup_workspace();
612 let root = dir.path();
613 fs::write(root.join("main.rs"), "fn main() {}").unwrap();
614 fs::write(root.join("readme.md"), "# Project").unwrap();
615 fs::write(root.join("config.yaml"), "key: value").unwrap();
616 fs::write(root.join(".codebonesignore"), "*.md\n*.yaml\n").unwrap();
618
619 let indexer = DefaultIndexer;
620 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
621 let names: Vec<_> = results
622 .iter()
623 .map(|r| r.path.to_string_lossy().to_string())
624 .collect();
625
626 for name in &names {
627 assert!(
628 name.ends_with(".rs"),
629 "Only .rs files should be indexed, but found: {}",
630 name
631 );
632 }
633 assert!(
634 names.iter().any(|n| n == "main.rs"),
635 "main.rs must be in results"
636 );
637 }
638
639 #[test]
642 fn test_path_traversal_outside_root_is_rejected_or_absent() {
643 let workspace = TempDir::new().unwrap();
648 let outside = TempDir::new().unwrap();
649
650 fs::write(workspace.path().join("inside.txt"), "safe content").unwrap();
652
653 fs::write(outside.path().join("outside.txt"), "secret content").unwrap();
655
656 #[cfg(unix)]
660 {
661 let link_path = workspace.path().join("escape_link");
662 std::os::unix::fs::symlink(outside.path().join("outside.txt"), &link_path).unwrap();
663
664 let indexer = DefaultIndexer;
665 let result = indexer.index(workspace.path(), &IndexerOptions::default());
669 let files = match result {
670 Ok(f) => f,
671 Err(IndexerError::PathTraversal(_)) | Err(IndexerError::SymlinkEscape(_)) => {
672 vec![] }
674 Err(e) => panic!("Unexpected error with follow_symlinks=false: {}", e),
675 };
676
677 let outside_root = outside.path();
678 for fh in &files {
679 let absolute = workspace.path().join(&fh.path);
680 assert!(
681 absolute.starts_with(workspace.path()),
682 "Traversal detected: {:?} is outside {:?}",
683 absolute,
684 workspace.path()
685 );
686 assert_ne!(
687 fh.path.to_string_lossy().as_ref(),
688 "escape_link",
689 "Symlink pointing outside root must not be indexed"
690 );
691 let _ = outside_root;
692 }
693 }
694
695 #[cfg(unix)]
698 {
699 let link_path2 = workspace.path().join("escape_link2");
700 if !link_path2.exists() {
702 std::os::unix::fs::symlink(outside.path().join("outside.txt"), &link_path2)
703 .unwrap();
704 }
705 let indexer = DefaultIndexer;
706 let options = IndexerOptions {
707 follow_symlinks: true,
708 ..Default::default()
709 };
710 let result = indexer.index(workspace.path(), &options);
711 match result {
714 Err(IndexerError::PathTraversal(_)) | Err(IndexerError::SymlinkEscape(_)) => {
715 }
717 Ok(files) => {
718 for fh in &files {
719 let absolute = workspace.path().join(&fh.path);
720 assert!(
721 absolute.starts_with(workspace.path()),
722 "Returned file escapes workspace: {:?}",
723 absolute
724 );
725 }
726 }
727 Err(other) => panic!("Unexpected error: {}", other),
728 }
729 }
730 }
731
732 #[test]
735 fn test_large_file_at_limit_is_indexed_small_file_over_limit_is_skipped() {
736 let dir = setup_workspace();
739 let root = dir.path();
740
741 let max_size: u64 = 500 * 1024; let at_limit_path = root.join("at_limit.txt");
745 let mut at_limit = File::create(&at_limit_path).unwrap();
746 at_limit.write_all(&vec![b'a'; max_size as usize]).unwrap();
747
748 let over_limit_path = root.join("over_limit.txt");
750 let mut over_limit = File::create(&over_limit_path).unwrap();
751 over_limit
752 .write_all(&vec![b'b'; max_size as usize + 1])
753 .unwrap();
754
755 let indexer = DefaultIndexer;
756 let options = IndexerOptions {
757 max_file_size_bytes: max_size,
758 respect_gitignore: false,
759 ..Default::default()
760 };
761
762 let results = indexer.index(root, &options).unwrap();
763 let names: Vec<String> = results
764 .iter()
765 .map(|r| r.path.to_string_lossy().to_string())
766 .collect();
767
768 assert!(
769 names.iter().any(|n| n == "at_limit.txt"),
770 "File of exactly max_file_size_bytes should be indexed (boundary is exclusive); got: {:?}",
771 names
772 );
773
774 assert!(
775 !names.iter().any(|n| n == "over_limit.txt"),
776 "File of max_file_size_bytes + 1 should NOT be indexed; got: {:?}",
777 names
778 );
779 }
780
781 #[test]
782 fn test_incremental_indexing_only_changed_file_has_new_hash() {
783 use std::collections::HashMap;
784
785 let dir = setup_workspace();
786 let root = dir.path();
787
788 fs::write(root.join("stable.rs"), "fn stable() {}").unwrap();
790 fs::write(root.join("volatile.rs"), "fn original() {}").unwrap();
791
792 let indexer = DefaultIndexer;
793 let options = IndexerOptions {
794 respect_gitignore: false,
795 ..Default::default()
796 };
797
798 let first_results = indexer.index(root, &options).unwrap();
800 let first_hashes: HashMap<String, String> = first_results
801 .iter()
802 .map(|fh| (fh.path.to_string_lossy().to_string(), fh.hash.clone()))
803 .collect();
804
805 assert!(
806 first_hashes.contains_key("stable.rs"),
807 "stable.rs must be in first index"
808 );
809 assert!(
810 first_hashes.contains_key("volatile.rs"),
811 "volatile.rs must be in first index"
812 );
813
814 fs::write(root.join("volatile.rs"), "fn modified() {}").unwrap();
816
817 let second_results = indexer.index(root, &options).unwrap();
819 let second_hashes: HashMap<String, String> = second_results
820 .iter()
821 .map(|fh| (fh.path.to_string_lossy().to_string(), fh.hash.clone()))
822 .collect();
823
824 assert_eq!(
826 first_hashes["stable.rs"], second_hashes["stable.rs"],
827 "stable.rs hash must not change between index passes"
828 );
829
830 assert_ne!(
832 first_hashes["volatile.rs"], second_hashes["volatile.rs"],
833 "volatile.rs hash must change after file modification"
834 );
835 }
836
837 #[test]
840 fn test_excludes_id_ecdsa_file() {
841 let dir = setup_workspace();
842 let root = dir.path();
843 fs::write(root.join("id_ecdsa"), "-----BEGIN EC PRIVATE KEY-----").unwrap();
844
845 let indexer = DefaultIndexer;
846 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
847 assert!(
848 results.is_empty(),
849 "id_ecdsa must be excluded, got: {:?}",
850 results
851 );
852 }
853
854 #[test]
855 fn test_excludes_tfvars_file() {
856 let dir = setup_workspace();
857 let root = dir.path();
858 fs::write(root.join("terraform.tfvars"), "db_password = \"secret\"").unwrap();
859
860 let indexer = DefaultIndexer;
861 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
862 assert!(
863 results.is_empty(),
864 "terraform.tfvars must be excluded, got: {:?}",
865 results
866 );
867 }
868
869 #[test]
870 fn test_excludes_p12_file() {
871 let dir = setup_workspace();
872 let root = dir.path();
873 fs::write(root.join("keystore.p12"), b"fake pkcs12 binary bytes").unwrap();
874
875 let indexer = DefaultIndexer;
876 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
877 assert!(
878 results.is_empty(),
879 "keystore.p12 must be excluded, got: {:?}",
880 results
881 );
882 }
883
884 #[test]
885 fn test_indexes_crt_file_without_pem_header() {
886 let dir = setup_workspace();
887 let root = dir.path();
888 fs::write(root.join("cert.crt"), "CERTIFICATE DATA WITHOUT PEM HEADER").unwrap();
889
890 let indexer = DefaultIndexer;
891 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
892 let names: Vec<_> = results
893 .iter()
894 .map(|r| r.path.to_string_lossy().to_string())
895 .collect();
896 assert!(
897 names.iter().any(|n| n == "cert.crt"),
898 "cert.crt without a PEM header must be indexed, got: {:?}",
899 names
900 );
901 }
902
903 #[test]
904 fn test_excludes_crt_file_with_pem_header() {
905 let dir = setup_workspace();
906 let root = dir.path();
907 fs::write(
908 root.join("cert.crt"),
909 "-----BEGIN CERTIFICATE-----\nMIIB...",
910 )
911 .unwrap();
912
913 let indexer = DefaultIndexer;
914 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
915 let names: Vec<_> = results
916 .iter()
917 .map(|r| r.path.to_string_lossy().to_string())
918 .collect();
919 assert!(
920 !names.iter().any(|n| n == "cert.crt"),
921 "cert.crt with a PEM header must be excluded, got: {:?}",
922 names
923 );
924 }
925
926 #[test]
929 #[cfg(unix)]
930 fn test_symlink_within_root_indexed_with_follow_symlinks() {
931 let dir = setup_workspace();
935 let root = dir.path();
936
937 let real_file = root.join("real.rs");
939 fs::write(&real_file, "fn real() {}").unwrap();
940
941 let symlink_path = root.join("link_to_real.rs");
943 std::os::unix::fs::symlink(&real_file, &symlink_path).unwrap();
944
945 let indexer = DefaultIndexer;
946 let options = IndexerOptions {
947 follow_symlinks: true,
948 ..Default::default()
949 };
950
951 let result = indexer.index(root, &options);
953 assert!(
954 result.is_ok(),
955 "with follow_symlinks=true, a symlink inside the root must be indexed (not errored); got: {:?}",
956 result
957 );
958 let files = result.unwrap();
959 let names: Vec<_> = files
960 .iter()
961 .map(|r| r.path.to_string_lossy().to_string())
962 .collect();
963 assert!(
964 names.iter().any(|n| n == "link_to_real.rs"),
965 "the within-root symlink must appear in indexed results; got: {:?}",
966 names
967 );
968 }
969}