1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
use std::fs::File;
use failure::{Error, ResultExt};
use memmap::{Mmap, MmapOptions};
pub fn thread_data(f: &File, thread: usize, n_threads: usize) -> Result<(Mmap, usize), Error> {
assert!(
thread < n_threads,
"Thread {} out of index [0, {})",
thread,
n_threads
);
let size = f.metadata().context("Cannot get file metadata")?.len();
let chunk_size = size as usize / n_threads;
let mmap = unsafe { MmapOptions::new().map(&f)? };
if thread == 0 {
return Ok((mmap, 0));
}
let mut start = thread * chunk_size;
while start < mmap.len() {
let next = mmap[start];
start += 1;
if next == b'\n' {
break;
}
}
Ok((mmap, start))
}
#[cfg(test)]
mod tests {
use std::fs::File;
use super::thread_data;
static CHUNKING_TEST_DATA: &str =
"a b c\nd e f\ng h i\nj k l\nm n o\np q r\ns t u\nv w x\ny z\n";
#[test]
fn thread_data_test() {
let f = File::open("testdata/chunking.txt").unwrap();
let (mmap, start) = thread_data(&f, 0, 3).unwrap();
assert_eq!(
&*mmap,
CHUNKING_TEST_DATA.as_bytes(),
"Memory mapping is incorrect"
);
assert_eq!(start, 0, "Incorrect start index");
let (mmap, start) = thread_data(&f, 1, 3).unwrap();
assert_eq!(
&*mmap,
CHUNKING_TEST_DATA.as_bytes(),
"Memory mapping is incorrect"
);
assert_eq!(start, 18, "Incorrect start index");
let (mmap, start) = thread_data(&f, 2, 3).unwrap();
assert_eq!(
&*mmap,
CHUNKING_TEST_DATA.as_bytes(),
"Memory mapping is incorrect"
);
assert_eq!(start, 36, "Incorrect start index");
}
#[should_panic]
#[test]
fn thread_data_out_of_bounds_test() {
let f = File::open("testdata/chunking.txt").unwrap();
let _ = thread_data(&f, 3, 3).unwrap();
}
}