1use ignore::WalkBuilder;
2use sha2::{Digest, Sha256};
3use std::fs::File;
4use std::io::Read;
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct FileHash {
10 pub path: PathBuf, pub hash: String, }
13
14#[derive(Debug, Clone)]
16pub struct IndexerOptions {
17 pub max_file_size_bytes: u64, pub max_file_count: usize, pub follow_symlinks: bool, pub respect_gitignore: bool, pub custom_ignore_file: Option<String>, }
23
24impl Default for IndexerOptions {
25 fn default() -> Self {
26 Self {
27 max_file_size_bytes: 500 * 1024,
28 max_file_count: 100000,
29 follow_symlinks: false,
30 respect_gitignore: true,
31 custom_ignore_file: Some(".codebonesignore".to_string()),
32 }
33 }
34}
35
36pub trait Indexer {
38 fn index(
40 &self,
41 workspace_root: &Path,
42 options: &IndexerOptions,
43 ) -> Result<Vec<FileHash>, IndexerError>;
44}
45
46#[derive(Debug, thiserror::Error)]
48pub enum IndexerError {
49 #[error("Path traversal detected: {0}")]
50 PathTraversal(PathBuf),
51 #[error("Symlink escape detected: {0}")]
52 SymlinkEscape(PathBuf),
53 #[error("IO error: {0}")]
54 Io(#[from] std::io::Error),
55 #[error("File count limit exceeded")]
56 FileCountLimitExceeded,
57}
58
59pub struct DefaultIndexer;
60
61impl Indexer for DefaultIndexer {
62 fn index(
63 &self,
64 workspace_root: &Path,
65 options: &IndexerOptions,
66 ) -> Result<Vec<FileHash>, IndexerError> {
67 let mut results = Vec::new();
68 let mut count = 0;
69
70 let mut builder = WalkBuilder::new(workspace_root);
71 builder.follow_links(options.follow_symlinks);
72 builder.git_ignore(options.respect_gitignore);
73 builder.git_exclude(options.respect_gitignore);
74 builder.git_global(options.respect_gitignore);
75 builder.ignore(options.respect_gitignore);
76 builder.require_git(false);
77
78 if let Some(ref custom) = options.custom_ignore_file {
79 builder.add_custom_ignore_filename(custom);
80 }
81
82 let walker = builder.build();
83
84 let canonical_root = std::fs::canonicalize(workspace_root)?;
85
86 for result in walker {
87 let entry = match result {
88 Ok(e) => e,
89 Err(_) => continue,
90 };
91
92 let path = entry.path();
93 if path.is_dir() {
94 continue;
95 }
96
97 let canonical_path = match std::fs::canonicalize(path) {
99 Ok(p) => p,
100 Err(_) => continue, };
102 if !canonical_path.starts_with(&canonical_root) {
103 return Err(IndexerError::PathTraversal(path.to_path_buf()));
104 }
105
106 if entry.path_is_symlink() && !options.follow_symlinks {
112 continue; }
114
115 let file_name = path.file_name().unwrap_or_default().to_string_lossy();
117 if file_name == ".env"
118 || file_name.starts_with(".env.")
119 || file_name == ".envrc"
120 || file_name.ends_with(".pem")
121 || file_name.ends_with(".key")
122 || file_name.ends_with(".tfvars")
123 || file_name.ends_with(".p12")
124 || file_name.ends_with(".pfx")
125 || file_name.ends_with(".jks")
126 || file_name.starts_with("id_rsa")
127 || file_name.starts_with("id_ed25519")
128 || file_name == "id_ecdsa"
129 || file_name == "id_dsa"
130 || file_name == "id_ecdsa_sk"
131 || file_name == "id_xmss"
132 || file_name == "credentials.json"
133 || file_name.ends_with(".secrets")
134 || file_name.ends_with(".token")
135 || file_name == ".npmrc"
136 || file_name == ".netrc"
137 {
138 continue;
139 }
140
141 let ext = path
143 .extension()
144 .unwrap_or_default()
145 .to_string_lossy()
146 .to_lowercase();
147 if [
148 "exe", "dll", "so", "png", "jpg", "jpeg", "pdf", "db", "sqlite", "wasm",
149 ]
150 .contains(&ext.as_str())
151 {
152 continue;
153 }
154
155 let metadata = std::fs::metadata(path)?;
157 if metadata.len() > options.max_file_size_bytes {
158 continue;
159 }
160
161 let mut file = File::open(path)?;
163 let mut buffer = [0; 8192];
164 let bytes_read = file.read(&mut buffer)?;
165 let chunk = &buffer[..bytes_read];
166 if chunk.contains(&0) {
167 continue;
168 }
169 if chunk.windows(11).any(|w| w == b"-----BEGIN ") {
171 continue;
172 }
173
174 let mut hasher = Sha256::new();
176 let mut file = File::open(path)?;
177 std::io::copy(&mut file, &mut hasher)?;
178 let hash = hex::encode(hasher.finalize());
179
180 let rel_path = path
181 .strip_prefix(workspace_root)
182 .unwrap_or(path)
183 .to_path_buf();
184
185 results.push(FileHash {
186 path: rel_path,
187 hash,
188 });
189
190 count += 1;
191 if count >= options.max_file_count {
192 return Err(IndexerError::FileCountLimitExceeded);
193 }
194 }
195
196 Ok(results)
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203 use std::fs::{self, File};
204 use std::io::Write;
205 use tempfile::TempDir;
206
207 fn setup_workspace() -> TempDir {
208 TempDir::new().unwrap()
209 }
210
211 #[test]
212 fn test_skips_symlinks_escaping_root() {
213 let dir = setup_workspace();
214 let root = dir.path();
215
216 let out_dir = TempDir::new().unwrap();
217 let out_file = out_dir.path().join("out.txt");
218 fs::write(&out_file, "out").unwrap();
219
220 let symlink_path = root.join("link");
221 #[cfg(unix)]
222 std::os::unix::fs::symlink(&out_file, &symlink_path).unwrap();
223
224 let indexer = DefaultIndexer;
225 let options = IndexerOptions {
226 follow_symlinks: true,
227 ..Default::default()
228 };
229
230 let result = indexer.index(root, &options);
231 assert!(matches!(result, Err(IndexerError::PathTraversal(_))));
232 }
233
234 #[test]
235 fn test_ignores_env_and_secret_files() {
236 let dir = setup_workspace();
237 let root = dir.path();
238 fs::write(root.join(".env"), "secret").unwrap();
239 fs::write(root.join("id_rsa"), "secret").unwrap();
240 fs::write(root.join("config.pem"), "secret").unwrap();
241 fs::write(root.join("normal.txt"), "normal").unwrap();
242
243 let indexer = DefaultIndexer;
244 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
245 assert_eq!(results.len(), 1);
246 assert_eq!(results[0].path, PathBuf::from("normal.txt"));
247 }
248
249 #[test]
250 fn test_ignores_gitignore() {
251 let dir = setup_workspace();
252 let root = dir.path();
253 fs::create_dir(root.join("ignored_dir")).unwrap();
254 fs::write(root.join("ignored_dir/test.txt"), "ignored").unwrap();
255 fs::write(root.join(".gitignore"), "ignored_dir/").unwrap();
256
257 let indexer = DefaultIndexer;
258 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
259 assert!(results.iter().all(|r| !r.path.starts_with("ignored_dir")));
260 }
261
262 #[test]
263 fn test_ignores_codebonesignore() {
264 let dir = setup_workspace();
265 let root = dir.path();
266 fs::create_dir(root.join("drafts")).unwrap();
267 fs::write(root.join("drafts/test.txt"), "ignored").unwrap();
268 fs::write(root.join(".codebonesignore"), "drafts/").unwrap();
269
270 let indexer = DefaultIndexer;
271 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
272 assert!(results.iter().all(|r| !r.path.starts_with("drafts")));
273 }
274
275 #[test]
276 fn test_skips_large_files() {
277 let dir = setup_workspace();
278 let root = dir.path();
279 let mut file = File::create(root.join("large.txt")).unwrap();
280 file.write_all(&vec![b'a'; 600 * 1024]).unwrap();
281
282 let indexer = DefaultIndexer;
283 let options = IndexerOptions {
284 max_file_size_bytes: 500 * 1024,
285 ..Default::default()
286 };
287 let results = indexer.index(root, &options).unwrap();
288 assert!(results.is_empty());
289 }
290
291 #[test]
292 fn test_skips_binary_extension() {
293 let dir = setup_workspace();
294 let root = dir.path();
295 fs::write(root.join("test.exe"), "fake binary").unwrap();
296
297 let indexer = DefaultIndexer;
298 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
299 assert!(results.is_empty());
300 }
301
302 #[test]
303 fn test_skips_binary_null_bytes() {
304 let dir = setup_workspace();
305 let root = dir.path();
306 fs::write(root.join("fake.txt"), b"hello\0world").unwrap();
307
308 let indexer = DefaultIndexer;
309 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
310 assert!(results.is_empty());
311 }
312
313 #[test]
314 fn test_replaces_invalid_utf8() {
315 let dir = setup_workspace();
316 let root = dir.path();
317 fs::write(root.join("invalid.txt"), b"hello\xFFworld").unwrap();
318
319 let indexer = DefaultIndexer;
320 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
321 assert_eq!(results.len(), 1);
322 }
323
324 #[test]
325 fn test_stops_at_file_count_limit() {
326 let dir = setup_workspace();
327 let root = dir.path();
328 for i in 0..10 {
329 fs::write(root.join(format!("{}.txt", i)), "test").unwrap();
330 }
331
332 let indexer = DefaultIndexer;
333 let options = IndexerOptions {
334 max_file_count: 5,
335 ..Default::default()
336 };
337 let result = indexer.index(root, &options);
338 assert!(matches!(result, Err(IndexerError::FileCountLimitExceeded)));
339 }
340
341 #[test]
342 fn test_generates_correct_hash() {
343 let dir = setup_workspace();
344 let root = dir.path();
345 fs::write(root.join("test.txt"), "hello world").unwrap();
346
347 let indexer = DefaultIndexer;
348 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
349 assert_eq!(results.len(), 1);
350 assert_eq!(
351 results[0].hash,
352 "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
353 );
354 }
355
356 #[test]
359 fn test_excludes_dotenv_file() {
360 let dir = setup_workspace();
361 let root = dir.path();
362 fs::write(root.join(".env"), "SECRET=hunter2").unwrap();
363
364 let indexer = DefaultIndexer;
365 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
366 let names: Vec<_> = results
367 .iter()
368 .map(|r| r.path.to_string_lossy().to_string())
369 .collect();
370 assert!(
371 !names.iter().any(|n| n == ".env"),
372 ".env must be excluded, got: {:?}",
373 names
374 );
375 }
376
377 #[test]
378 fn test_excludes_id_rsa_file() {
379 let dir = setup_workspace();
380 let root = dir.path();
381 fs::write(root.join("id_rsa"), "-----BEGIN RSA PRIVATE KEY-----").unwrap();
382
383 let indexer = DefaultIndexer;
384 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
385 let names: Vec<_> = results
386 .iter()
387 .map(|r| r.path.to_string_lossy().to_string())
388 .collect();
389 assert!(
390 !names.iter().any(|n| n == "id_rsa"),
391 "id_rsa must be excluded, got: {:?}",
392 names
393 );
394 }
395
396 #[test]
397 fn test_excludes_credentials_json_file() {
398 let dir = setup_workspace();
399 let root = dir.path();
400 fs::write(root.join("credentials.json"), r#"{"token":"secret"}"#).unwrap();
401
402 let indexer = DefaultIndexer;
403 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
404 let names: Vec<_> = results
405 .iter()
406 .map(|r| r.path.to_string_lossy().to_string())
407 .collect();
408 assert!(
409 !names.iter().any(|n| n == "credentials.json"),
410 "credentials.json must be excluded, got: {:?}",
411 names
412 );
413 }
414
415 #[test]
416 fn test_excludes_pem_header_file() {
417 let dir = setup_workspace();
420 let root = dir.path();
421 fs::write(
423 root.join("server.crt"),
424 "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----\n",
425 )
426 .unwrap();
427 fs::write(root.join("normal.txt"), "just text").unwrap();
428
429 let indexer = DefaultIndexer;
430 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
431 let names: Vec<_> = results
432 .iter()
433 .map(|r| r.path.to_string_lossy().to_string())
434 .collect();
435 assert!(
436 !names.iter().any(|n| n == "server.crt"),
437 "File with PEM header must be excluded, got: {:?}",
438 names
439 );
440 assert!(
441 names.iter().any(|n| n == "normal.txt"),
442 "normal.txt must still be indexed, got: {:?}",
443 names
444 );
445 }
446
447 #[test]
448 fn test_normal_rs_file_is_not_excluded() {
449 let dir = setup_workspace();
450 let root = dir.path();
451 fs::write(
452 root.join("lib.rs"),
453 "pub fn add(a: i32, b: i32) -> i32 { a + b }",
454 )
455 .unwrap();
456
457 let indexer = DefaultIndexer;
458 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
459 let names: Vec<_> = results
460 .iter()
461 .map(|r| r.path.to_string_lossy().to_string())
462 .collect();
463 assert!(
464 names.iter().any(|n| n == "lib.rs"),
465 "lib.rs must be indexed, got: {:?}",
466 names
467 );
468 }
469
470 #[test]
473 fn test_excludes_exe_extension() {
474 let dir = setup_workspace();
475 let root = dir.path();
476 fs::write(root.join("app.exe"), "MZ fake windows binary").unwrap();
477
478 let indexer = DefaultIndexer;
479 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
480 let names: Vec<_> = results
481 .iter()
482 .map(|r| r.path.to_string_lossy().to_string())
483 .collect();
484 assert!(
485 !names.iter().any(|n| n.ends_with(".exe")),
486 ".exe must be excluded, got: {:?}",
487 names
488 );
489 }
490
491 #[test]
492 fn test_excludes_png_extension() {
493 let dir = setup_workspace();
494 let root = dir.path();
495 fs::write(root.join("logo.png"), b"\x89PNG\r\n\x1a\nfake image data").unwrap();
497
498 let indexer = DefaultIndexer;
499 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
500 let names: Vec<_> = results
501 .iter()
502 .map(|r| r.path.to_string_lossy().to_string())
503 .collect();
504 assert!(
505 !names.iter().any(|n| n.ends_with(".png")),
506 ".png must be excluded, got: {:?}",
507 names
508 );
509 }
510
511 #[test]
512 fn test_excludes_source_file_with_null_bytes() {
513 let dir = setup_workspace();
516 let root = dir.path();
517 let mut content = b"fn main() { println!(\"hello\"); }\n".to_vec();
518 content.push(0x00); content.extend_from_slice(b" // more code");
520 fs::write(root.join("tricky.rs"), &content).unwrap();
521
522 let indexer = DefaultIndexer;
523 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
524 let names: Vec<_> = results
525 .iter()
526 .map(|r| r.path.to_string_lossy().to_string())
527 .collect();
528 assert!(
529 !names.iter().any(|n| n == "tricky.rs"),
530 "Source file with null bytes must be excluded, got: {:?}",
531 names
532 );
533 }
534
535 #[test]
538 fn test_codebonesignore_glob_excludes_toml_files() {
539 let dir = setup_workspace();
541 let root = dir.path();
542 fs::write(root.join("Cargo.toml"), "[package]\nname = \"test\"").unwrap();
543 fs::write(root.join("main.rs"), "fn main() {}").unwrap();
544 fs::write(root.join(".codebonesignore"), "*.toml\n").unwrap();
545
546 let indexer = DefaultIndexer;
547 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
548 let names: Vec<_> = results
549 .iter()
550 .map(|r| r.path.to_string_lossy().to_string())
551 .collect();
552
553 assert!(
554 !names.iter().any(|n| n.ends_with(".toml")),
555 "*.toml files must be excluded via .codebonesignore, got: {:?}",
556 names
557 );
558 assert!(
559 names.iter().any(|n| n == "main.rs"),
560 "main.rs must still be indexed, got: {:?}",
561 names
562 );
563 }
564
565 #[test]
566 fn test_gitignore_glob_excludes_matching_files() {
567 let dir = setup_workspace();
569 let root = dir.path();
570 fs::write(root.join("app.log"), "INFO: server started").unwrap();
571 fs::write(root.join("server.rs"), "fn serve() {}").unwrap();
572 fs::write(root.join(".gitignore"), "*.log\n").unwrap();
573
574 let indexer = DefaultIndexer;
575 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
576 let names: Vec<_> = results
577 .iter()
578 .map(|r| r.path.to_string_lossy().to_string())
579 .collect();
580
581 assert!(
582 !names.iter().any(|n| n.ends_with(".log")),
583 "*.log files must be excluded via .gitignore, got: {:?}",
584 names
585 );
586 }
587
588 #[test]
589 fn test_only_rs_files_indexed_when_all_others_ignored() {
590 let dir = setup_workspace();
592 let root = dir.path();
593 fs::write(root.join("main.rs"), "fn main() {}").unwrap();
594 fs::write(root.join("readme.md"), "# Project").unwrap();
595 fs::write(root.join("config.yaml"), "key: value").unwrap();
596 fs::write(root.join(".codebonesignore"), "*.md\n*.yaml\n").unwrap();
598
599 let indexer = DefaultIndexer;
600 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
601 let names: Vec<_> = results
602 .iter()
603 .map(|r| r.path.to_string_lossy().to_string())
604 .collect();
605
606 for name in &names {
607 assert!(
608 name.ends_with(".rs"),
609 "Only .rs files should be indexed, but found: {}",
610 name
611 );
612 }
613 assert!(
614 names.iter().any(|n| n == "main.rs"),
615 "main.rs must be in results"
616 );
617 }
618
619 #[test]
622 fn test_path_traversal_outside_root_is_rejected_or_absent() {
623 let workspace = TempDir::new().unwrap();
628 let outside = TempDir::new().unwrap();
629
630 fs::write(workspace.path().join("inside.txt"), "safe content").unwrap();
632
633 fs::write(outside.path().join("outside.txt"), "secret content").unwrap();
635
636 #[cfg(unix)]
640 {
641 let link_path = workspace.path().join("escape_link");
642 std::os::unix::fs::symlink(outside.path().join("outside.txt"), &link_path).unwrap();
643
644 let indexer = DefaultIndexer;
645 let result = indexer.index(workspace.path(), &IndexerOptions::default());
649 let files = match result {
650 Ok(f) => f,
651 Err(IndexerError::PathTraversal(_)) | Err(IndexerError::SymlinkEscape(_)) => {
652 vec![] }
654 Err(e) => panic!("Unexpected error with follow_symlinks=false: {}", e),
655 };
656
657 let outside_root = outside.path();
658 for fh in &files {
659 let absolute = workspace.path().join(&fh.path);
660 assert!(
661 absolute.starts_with(workspace.path()),
662 "Traversal detected: {:?} is outside {:?}",
663 absolute,
664 workspace.path()
665 );
666 assert_ne!(
667 fh.path.to_string_lossy().as_ref(),
668 "escape_link",
669 "Symlink pointing outside root must not be indexed"
670 );
671 let _ = outside_root;
672 }
673 }
674
675 #[cfg(unix)]
678 {
679 let link_path2 = workspace.path().join("escape_link2");
680 if !link_path2.exists() {
682 std::os::unix::fs::symlink(outside.path().join("outside.txt"), &link_path2)
683 .unwrap();
684 }
685 let indexer = DefaultIndexer;
686 let options = IndexerOptions {
687 follow_symlinks: true,
688 ..Default::default()
689 };
690 let result = indexer.index(workspace.path(), &options);
691 match result {
694 Err(IndexerError::PathTraversal(_)) | Err(IndexerError::SymlinkEscape(_)) => {
695 }
697 Ok(files) => {
698 for fh in &files {
699 let absolute = workspace.path().join(&fh.path);
700 assert!(
701 absolute.starts_with(workspace.path()),
702 "Returned file escapes workspace: {:?}",
703 absolute
704 );
705 }
706 }
707 Err(other) => panic!("Unexpected error: {}", other),
708 }
709 }
710 }
711
712 #[test]
715 fn test_large_file_at_limit_is_indexed_small_file_over_limit_is_skipped() {
716 let dir = setup_workspace();
719 let root = dir.path();
720
721 let max_size: u64 = 500 * 1024; let at_limit_path = root.join("at_limit.txt");
725 let mut at_limit = File::create(&at_limit_path).unwrap();
726 at_limit.write_all(&vec![b'a'; max_size as usize]).unwrap();
727
728 let over_limit_path = root.join("over_limit.txt");
730 let mut over_limit = File::create(&over_limit_path).unwrap();
731 over_limit
732 .write_all(&vec![b'b'; max_size as usize + 1])
733 .unwrap();
734
735 let indexer = DefaultIndexer;
736 let options = IndexerOptions {
737 max_file_size_bytes: max_size,
738 respect_gitignore: false,
739 ..Default::default()
740 };
741
742 let results = indexer.index(root, &options).unwrap();
743 let names: Vec<String> = results
744 .iter()
745 .map(|r| r.path.to_string_lossy().to_string())
746 .collect();
747
748 assert!(
749 names.iter().any(|n| n == "at_limit.txt"),
750 "File of exactly max_file_size_bytes should be indexed (boundary is exclusive); got: {:?}",
751 names
752 );
753
754 assert!(
755 !names.iter().any(|n| n == "over_limit.txt"),
756 "File of max_file_size_bytes + 1 should NOT be indexed; got: {:?}",
757 names
758 );
759 }
760
761 #[test]
762 fn test_incremental_indexing_only_changed_file_has_new_hash() {
763 use std::collections::HashMap;
764
765 let dir = setup_workspace();
766 let root = dir.path();
767
768 fs::write(root.join("stable.rs"), "fn stable() {}").unwrap();
770 fs::write(root.join("volatile.rs"), "fn original() {}").unwrap();
771
772 let indexer = DefaultIndexer;
773 let options = IndexerOptions {
774 respect_gitignore: false,
775 ..Default::default()
776 };
777
778 let first_results = indexer.index(root, &options).unwrap();
780 let first_hashes: HashMap<String, String> = first_results
781 .iter()
782 .map(|fh| (fh.path.to_string_lossy().to_string(), fh.hash.clone()))
783 .collect();
784
785 assert!(
786 first_hashes.contains_key("stable.rs"),
787 "stable.rs must be in first index"
788 );
789 assert!(
790 first_hashes.contains_key("volatile.rs"),
791 "volatile.rs must be in first index"
792 );
793
794 fs::write(root.join("volatile.rs"), "fn modified() {}").unwrap();
796
797 let second_results = indexer.index(root, &options).unwrap();
799 let second_hashes: HashMap<String, String> = second_results
800 .iter()
801 .map(|fh| (fh.path.to_string_lossy().to_string(), fh.hash.clone()))
802 .collect();
803
804 assert_eq!(
806 first_hashes["stable.rs"], second_hashes["stable.rs"],
807 "stable.rs hash must not change between index passes"
808 );
809
810 assert_ne!(
812 first_hashes["volatile.rs"], second_hashes["volatile.rs"],
813 "volatile.rs hash must change after file modification"
814 );
815 }
816
817 #[test]
820 fn test_excludes_id_ecdsa_file() {
821 let dir = setup_workspace();
822 let root = dir.path();
823 fs::write(root.join("id_ecdsa"), "-----BEGIN EC PRIVATE KEY-----").unwrap();
824
825 let indexer = DefaultIndexer;
826 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
827 assert!(
828 results.is_empty(),
829 "id_ecdsa must be excluded, got: {:?}",
830 results
831 );
832 }
833
834 #[test]
835 fn test_excludes_tfvars_file() {
836 let dir = setup_workspace();
837 let root = dir.path();
838 fs::write(root.join("terraform.tfvars"), "db_password = \"secret\"").unwrap();
839
840 let indexer = DefaultIndexer;
841 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
842 assert!(
843 results.is_empty(),
844 "terraform.tfvars must be excluded, got: {:?}",
845 results
846 );
847 }
848
849 #[test]
850 fn test_excludes_p12_file() {
851 let dir = setup_workspace();
852 let root = dir.path();
853 fs::write(root.join("keystore.p12"), b"fake pkcs12 binary bytes").unwrap();
854
855 let indexer = DefaultIndexer;
856 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
857 assert!(
858 results.is_empty(),
859 "keystore.p12 must be excluded, got: {:?}",
860 results
861 );
862 }
863
864 #[test]
865 fn test_indexes_crt_file_without_pem_header() {
866 let dir = setup_workspace();
867 let root = dir.path();
868 fs::write(root.join("cert.crt"), "CERTIFICATE DATA WITHOUT PEM HEADER").unwrap();
869
870 let indexer = DefaultIndexer;
871 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
872 let names: Vec<_> = results
873 .iter()
874 .map(|r| r.path.to_string_lossy().to_string())
875 .collect();
876 assert!(
877 names.iter().any(|n| n == "cert.crt"),
878 "cert.crt without a PEM header must be indexed, got: {:?}",
879 names
880 );
881 }
882
883 #[test]
884 fn test_excludes_crt_file_with_pem_header() {
885 let dir = setup_workspace();
886 let root = dir.path();
887 fs::write(
888 root.join("cert.crt"),
889 "-----BEGIN CERTIFICATE-----\nMIIB...",
890 )
891 .unwrap();
892
893 let indexer = DefaultIndexer;
894 let results = indexer.index(root, &IndexerOptions::default()).unwrap();
895 let names: Vec<_> = results
896 .iter()
897 .map(|r| r.path.to_string_lossy().to_string())
898 .collect();
899 assert!(
900 !names.iter().any(|n| n == "cert.crt"),
901 "cert.crt with a PEM header must be excluded, got: {:?}",
902 names
903 );
904 }
905
906 #[test]
909 #[cfg(unix)]
910 fn test_symlink_within_root_indexed_with_follow_symlinks() {
911 let dir = setup_workspace();
915 let root = dir.path();
916
917 let real_file = root.join("real.rs");
919 fs::write(&real_file, "fn real() {}").unwrap();
920
921 let symlink_path = root.join("link_to_real.rs");
923 std::os::unix::fs::symlink(&real_file, &symlink_path).unwrap();
924
925 let indexer = DefaultIndexer;
926 let options = IndexerOptions {
927 follow_symlinks: true,
928 ..Default::default()
929 };
930
931 let result = indexer.index(root, &options);
933 assert!(
934 result.is_ok(),
935 "with follow_symlinks=true, a symlink inside the root must be indexed (not errored); got: {:?}",
936 result
937 );
938 let files = result.unwrap();
939 let names: Vec<_> = files
940 .iter()
941 .map(|r| r.path.to_string_lossy().to_string())
942 .collect();
943 assert!(
944 names.iter().any(|n| n == "link_to_real.rs"),
945 "the within-root symlink must appear in indexed results; got: {:?}",
946 names
947 );
948 }
949}