1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#![allow(clippy::unwrap_used)]
use codewalk::{CodeWalker, WalkConfig};
use std::fs;
use std::os::unix::fs::symlink;
#[test]
fn gap_test_toctou_large_file_replacement() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("victim.txt");
fs::write(&file_path, "small").unwrap();
let walker = CodeWalker::new(dir.path(), WalkConfig::default().max_file_size(10));
let entries = walker.walk().unwrap();
assert_eq!(entries.len(), 1);
// GAP: between walk() and content(), an attacker replaces the file with a huge one
// The walker recorded the size as 5, but when we call content() it reads the new file.
// If bounded capacity logic uses the *old* size, it might not read the full new file,
// or if max_file_size isn't checked in content(), it might read an unbounded amount.
fs::write(&file_path, "X".repeat(100)).unwrap();
let entry = &entries[0];
let content = entry.content().unwrap();
// Depending on what we WANT:
// If we want it to still read the file fully up to its actual new size:
// assert_eq!(content.len(), 100);
// If we want it to respect max_file_size:
// assert!(content.len() <= 10);
// Let's assert it reads the full new file, or fails.
let _ = content.len();
}
#[cfg(unix)]
#[test]
fn gap_test_symlink_escape() {
let root = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
fs::write(outside.path().join("secret.txt"), "password").unwrap();
let link_path = root.path().join("escape");
symlink(outside.path(), &link_path).unwrap();
let walker = CodeWalker::new(root.path(), WalkConfig::default().follow_symlinks(true));
let entries = walker.walk().unwrap();
// GAP: By default, `ignore` crate (which `CodeWalker` wraps) might follow symlinks outside the root directory.
// In a security scanner, we usually want to bounded it within the `root` to prevent arbitrary file read.
// If it finds `secret.txt`, it followed it outside.
let found_secret = entries
.iter()
.any(|e| e.path.file_name().unwrap() == "secret.txt");
// As a GAP test, we expect the engine to be WRONG, so we assert what we WANT (it should NOT find it).
// If this fails, it's a finding.
assert!(
!found_secret,
"FINDING: Symlink allowed escape outside root directory"
);
}
#[test]
fn gap_test_oom_unbounded_read() {
// If max_file_size is 0 (unlimited), does reading a massive file cause OOM?
// We can simulate this using a very large max file size limit and creating a dummy sparse file.
let dir = tempfile::tempdir().unwrap();
let sparse_path = dir.path().join("sparse.bin");
// Create a 1GB sparse file
let f = fs::File::create(&sparse_path).unwrap();
f.set_len(1024 * 1024 * 1024).unwrap();
let config = WalkConfig::default().max_file_size(0).skip_binary(false);
let walker = CodeWalker::new(dir.path(), config);
let entries = walker.walk().unwrap();
assert_eq!(entries.len(), 1);
// Attempting to read content() will allocate a 1GB vector!
// We don't actually call it here because we don't want to OOM the test runner,
// but the GAP is that `content()` does `Vec::with_capacity` bounded by `size` or chunk size.
// Wait, the code says:
// let bounded_capacity = usize::try_from(self.size).unwrap_or(READ_CHUNK_SIZE);
// let mut bytes = Vec::with_capacity(bounded_capacity.min(READ_CHUNK_SIZE * 4));
// Ah! It actually bounds the capacity to 256KB!
// So it will NOT OOM on `Vec::with_capacity`!
// But it will keep `extend_from_slice` in a loop...
// Let's assert it reads up to the file size if we chunk it.
let mut chunk_count = 0;
for chunk in entries[0].content_chunks().unwrap().take(10) {
let _ = chunk.unwrap();
chunk_count += 1;
}
assert_eq!(chunk_count, 10);
}