Skip to main content

dev_fixtures/
adversarial.rs

1//! Adversarial input generators.
2//!
3//! These generators produce inputs that exercise failure paths:
4//! oversized files, malformed UTF-8, unusual filenames. Every
5//! generator is deterministic given a seed (where applicable).
6//!
7//! ## Use cases
8//!
9//! - **Oversized**: confirm that buffer-allocation paths handle
10//!   files larger than common assumptions.
11//! - **Malformed UTF-8**: confirm that decoders return errors instead
12//!   of panicking on invalid bytes.
13//! - **Unusual names**: confirm that path-handling code copes with
14//!   Unicode, emoji, very long names, etc.
15
16use std::fs;
17use std::io;
18use std::path::Path;
19
20/// Write `size_bytes` of zeroes to `path`. The parent directory MUST
21/// exist.
22///
23/// Backed by `fs::write` of a single buffer; for very large sizes use
24/// [`oversized_sparse`] instead to save memory.
25///
26/// # Example
27///
28/// ```
29/// use dev_fixtures::adversarial::oversized_zeros;
30/// let dir = tempfile::tempdir().unwrap();
31/// let path = dir.path().join("big.bin");
32/// oversized_zeros(&path, 4096).unwrap();
33/// assert_eq!(std::fs::metadata(&path).unwrap().len(), 4096);
34/// ```
35pub fn oversized_zeros(path: &Path, size_bytes: u64) -> io::Result<()> {
36    let buf = vec![0u8; size_bytes as usize];
37    fs::write(path, buf)
38}
39
40/// Write `size_bytes` to `path` using `set_len`, which on most
41/// platforms creates a sparse file (no actual disk space used until
42/// written to).
43///
44/// Useful for testing very large file handling without consuming
45/// disk space.
46///
47/// # Example
48///
49/// ```
50/// use dev_fixtures::adversarial::oversized_sparse;
51/// let dir = tempfile::tempdir().unwrap();
52/// let path = dir.path().join("sparse.bin");
53/// oversized_sparse(&path, 1_000_000).unwrap();
54/// assert_eq!(std::fs::metadata(&path).unwrap().len(), 1_000_000);
55/// ```
56pub fn oversized_sparse(path: &Path, size_bytes: u64) -> io::Result<()> {
57    let f = fs::OpenOptions::new()
58        .create(true)
59        .truncate(true)
60        .write(true)
61        .open(path)?;
62    f.set_len(size_bytes)?;
63    Ok(())
64}
65
66/// Write a deliberately malformed UTF-8 byte sequence to `path`.
67///
68/// The file contains valid ASCII followed by bytes that no UTF-8
69/// decoder accepts (`0xFF, 0xFE, 0xFD, 0xFC` — invalid lead bytes).
70///
71/// # Example
72///
73/// ```
74/// use dev_fixtures::adversarial::malformed_utf8;
75/// let dir = tempfile::tempdir().unwrap();
76/// let path = dir.path().join("bad.txt");
77/// malformed_utf8(&path).unwrap();
78/// let bytes = std::fs::read(&path).unwrap();
79/// assert!(std::str::from_utf8(&bytes).is_err());
80/// ```
81pub fn malformed_utf8(path: &Path) -> io::Result<()> {
82    let mut bytes = b"valid ascii prefix\n".to_vec();
83    bytes.extend_from_slice(&[0xFF, 0xFE, 0xFD, 0xFC]);
84    bytes.extend_from_slice(b"more after the bad bytes\n");
85    fs::write(path, bytes)
86}
87
88/// Write `n` deterministic bytes to `path` using a splitmix64-derived
89/// stream seeded by `seed`.
90///
91/// # Example
92///
93/// ```
94/// use dev_fixtures::adversarial::random_bytes;
95/// let dir = tempfile::tempdir().unwrap();
96/// let path = dir.path().join("rand.bin");
97/// random_bytes(&path, 32, 42).unwrap();
98/// let a = std::fs::read(&path).unwrap();
99/// random_bytes(&path, 32, 42).unwrap();
100/// let b = std::fs::read(&path).unwrap();
101/// assert_eq!(a, b); // deterministic from seed
102/// ```
103pub fn random_bytes(path: &Path, n: usize, seed: u64) -> io::Result<()> {
104    let mut state = seed;
105    let mut bytes = Vec::with_capacity(n);
106    while bytes.len() < n {
107        // splitmix64 step.
108        state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
109        let mut z = state;
110        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
111        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
112        z ^= z >> 31;
113        let chunk = z.to_le_bytes();
114        for b in chunk {
115            if bytes.len() < n {
116                bytes.push(b);
117            }
118        }
119    }
120    fs::write(path, bytes)
121}
122
123/// Names that are valid on most filesystems but exercise edge cases:
124/// long names, Unicode, emoji, leading dot.
125///
126/// Returns up to `count` names; the available pool is finite (~10).
127///
128/// # Example
129///
130/// ```
131/// use dev_fixtures::adversarial::unusual_names;
132/// let names = unusual_names(5);
133/// assert_eq!(names.len(), 5);
134/// ```
135pub fn unusual_names(count: usize) -> Vec<String> {
136    let pool = vec![
137        ".hidden_file".to_string(),
138        "with space.txt".to_string(),
139        "with-many-dashes-in-name.txt".to_string(),
140        "résumé.tex".to_string(),
141        "файл.txt".to_string(),
142        "ファイル.txt".to_string(),
143        "emoji-✅-name.txt".to_string(),
144        format!("{}{}", "long_name_", "x".repeat(120)),
145        "trailing.dot.".to_string(),
146        "no_extension".to_string(),
147    ];
148    pool.into_iter().take(count).collect()
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn oversized_zeros_writes_exact_size() {
157        let dir = tempfile::tempdir().unwrap();
158        let path = dir.path().join("big.bin");
159        oversized_zeros(&path, 1024).unwrap();
160        let bytes = fs::read(&path).unwrap();
161        assert_eq!(bytes.len(), 1024);
162        assert!(bytes.iter().all(|&b| b == 0));
163    }
164
165    #[test]
166    fn oversized_sparse_reports_exact_size() {
167        let dir = tempfile::tempdir().unwrap();
168        let path = dir.path().join("sparse.bin");
169        oversized_sparse(&path, 4096).unwrap();
170        let meta = fs::metadata(&path).unwrap();
171        assert_eq!(meta.len(), 4096);
172    }
173
174    #[test]
175    fn malformed_utf8_is_not_valid_utf8() {
176        let dir = tempfile::tempdir().unwrap();
177        let path = dir.path().join("bad.txt");
178        malformed_utf8(&path).unwrap();
179        let bytes = fs::read(&path).unwrap();
180        assert!(std::str::from_utf8(&bytes).is_err());
181    }
182
183    #[test]
184    fn random_bytes_are_deterministic_from_seed() {
185        let dir = tempfile::tempdir().unwrap();
186        let path = dir.path().join("rand.bin");
187        random_bytes(&path, 64, 7).unwrap();
188        let a = fs::read(&path).unwrap();
189        random_bytes(&path, 64, 7).unwrap();
190        let b = fs::read(&path).unwrap();
191        assert_eq!(a, b);
192        assert_eq!(a.len(), 64);
193    }
194
195    #[test]
196    fn random_bytes_differ_with_seed() {
197        let dir = tempfile::tempdir().unwrap();
198        let path_a = dir.path().join("a.bin");
199        let path_b = dir.path().join("b.bin");
200        random_bytes(&path_a, 64, 1).unwrap();
201        random_bytes(&path_b, 64, 2).unwrap();
202        let a = fs::read(&path_a).unwrap();
203        let b = fs::read(&path_b).unwrap();
204        assert_ne!(a, b);
205    }
206
207    #[test]
208    fn unusual_names_returns_requested_count() {
209        let names = unusual_names(3);
210        assert_eq!(names.len(), 3);
211        let big = unusual_names(100);
212        // Pool is finite; bounded by pool size.
213        assert!(big.len() <= 10);
214    }
215}