finalfrontier_utils/
data.rs1use std::fs::File;
2
3use failure::{Error, ResultExt};
4use memmap::{Mmap, MmapOptions};
5
6pub fn thread_data_text(f: &File, thread: usize, n_threads: usize) -> Result<(Mmap, usize), Error> {
12 assert!(
13 thread < n_threads,
14 "Thread {} out of index [0, {})",
15 thread,
16 n_threads
17 );
18
19 let size = f.metadata().context("Cannot get file metadata")?.len();
20 let chunk_size = size as usize / n_threads;
21
22 let mmap = unsafe { MmapOptions::new().map(&f)? };
23
24 if thread == 0 {
25 return Ok((mmap, 0));
26 }
27
28 let mut start = thread * chunk_size;
29 while start < mmap.len() {
30 let next = mmap[start];
31 start += 1;
32 if next == b'\n' {
33 break;
34 }
35 }
36
37 Ok((mmap, start))
38}
39
40pub fn thread_data_conllx(
46 f: &File,
47 thread: usize,
48 n_threads: usize,
49) -> Result<(Mmap, usize), Error> {
50 assert!(
51 thread < n_threads,
52 "Thread {} out of index [0, {})",
53 thread,
54 n_threads
55 );
56
57 let size = f.metadata().context("Cannot get file metadata")?.len();
58 let chunk_size = size as usize / n_threads;
59
60 let mmap = unsafe { MmapOptions::new().map(&f)? };
61
62 if thread == 0 {
63 return Ok((mmap, 0));
64 }
65
66 let mut start = thread * chunk_size;
67 while start < mmap.len() - 1 {
68 let next = mmap[start];
69 start += 1;
70 if next == b'\n' && mmap[start] == b'\n' {
71 start += 1;
72 break;
73 }
74 }
75
76 Ok((mmap, start))
77}
78
79#[cfg(test)]
80mod tests {
81 use std::fs::File;
82
83 use super::{thread_data_conllx, thread_data_text};
84
85 static CHUNKING_TEST_DATA: &str =
86 "a b c\nd e f\ng h i\nj k l\nm n o\np q r\ns t u\nv w x\ny z\n";
87
88 static CHUNKING_TEST_DATA_DEPS: &str =
89 "a b c\nd e f\n\ng h i\nj k l\n\nm n o\np q r\n\ns t u\nv w x\ny z\n";
90
91 #[test]
92 fn thread_data_test() {
93 let f = File::open("testdata/chunking.txt").unwrap();
94
95 let (mmap, start) = thread_data_text(&f, 0, 3).unwrap();
96 assert_eq!(
97 &*mmap,
98 CHUNKING_TEST_DATA.as_bytes(),
99 "Memory mapping is incorrect"
100 );
101 assert_eq!(start, 0, "Incorrect start index");
102
103 let (mmap, start) = thread_data_text(&f, 1, 3).unwrap();
104 assert_eq!(
105 &*mmap,
106 CHUNKING_TEST_DATA.as_bytes(),
107 "Memory mapping is incorrect"
108 );
109 assert_eq!(start, 18, "Incorrect start index");
110
111 let (mmap, start) = thread_data_text(&f, 2, 3).unwrap();
112 assert_eq!(
113 &*mmap,
114 CHUNKING_TEST_DATA.as_bytes(),
115 "Memory mapping is incorrect"
116 );
117 assert_eq!(start, 36, "Incorrect start index");
118 }
119
120 #[test]
121 fn deps_thread_data_test() {
122 let f = File::open("testdata/dep_chunking.txt").unwrap();
127 let (mmap, start) = thread_data_conllx(&f, 0, 3).unwrap();
128 assert_eq!(
129 &*mmap,
130 CHUNKING_TEST_DATA_DEPS.as_bytes(),
131 "Memory mapping is incorrect"
132 );
133 assert_eq!(start, 0, "Incorrect start index");
134
135 let (mmap, start) = thread_data_conllx(&f, 1, 3).unwrap();
136 assert_eq!(
137 &*mmap,
138 CHUNKING_TEST_DATA_DEPS.as_bytes(),
139 "Memory mapping is incorrect"
140 );
141 assert_eq!(start, 26, "Incorrect start index");
142
143 let (mmap, start) = thread_data_conllx(&f, 2, 3).unwrap();
144 assert_eq!(
145 &*mmap,
146 CHUNKING_TEST_DATA_DEPS.as_bytes(),
147 "Memory mapping is incorrect"
148 );
149 assert_eq!(start, 39, "Incorrect start index");
150 }
151
152 #[should_panic]
153 #[test]
154 fn thread_data_out_of_bounds_test() {
155 let f = File::open("testdata/chunking.txt").unwrap();
156 let _ = thread_data_conllx(&f, 3, 3).unwrap();
157 }
158}