1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
use std::fs::{File, OpenOptions};
use std::io::{BufReader, BufWriter, self};
use std::marker::PhantomData;
use std::path::Path;
use tempdir::TempDir;
mod iter;
pub use iter::{ExtSortedIterator, Sortable};
pub struct ExtSorter<T> {
buffer_n_items: usize,
tmp_dir: TempDir,
phantom: PhantomData<T>,
}
impl<T> ExtSorter<T>
where
T: Sortable<BufWriter<File>, BufReader<File>>,
T::Error: From<io::Error>,
{
pub fn new(buffer_n_items: usize) -> io::Result<Self> {
Ok(ExtSorter {
buffer_n_items,
tmp_dir: TempDir::new("extsort_lily")?,
phantom: PhantomData,
})
}
pub fn new_in<P: AsRef<Path>>(
buffer_n_items: usize, tmp_dir: P,
) -> io::Result<Self> {
Ok(ExtSorter {
buffer_n_items,
tmp_dir: TempDir::new_in(tmp_dir, "extsort_lily")?,
phantom: PhantomData,
})
}
pub fn sort<I>(
&self, unsorted: I,
) -> Result<iter::ExtSortedIterator<T, BufReader<File>, BufWriter<File>>, T::Error>
where
I: Iterator<Item = T>,
{
let mut chunk_count = 0;
{
let mut current_count = 0;
let mut chunk = Vec::new();
for seq in unsorted {
current_count += 1;
chunk.push(seq);
if current_count >= self.buffer_n_items {
chunk.sort_unstable();
self.write_chunk(
&self.tmp_dir.path().join(chunk_count.to_string()),
&mut chunk,
)?;
chunk.clear();
current_count = 0;
chunk_count += 1;
}
}
if !chunk.is_empty() {
chunk.sort_unstable();
self.write_chunk(
&self.tmp_dir.path().join(chunk_count.to_string()),
&mut chunk,
)?;
chunk_count += 1;
}
}
let readers = (0..chunk_count).map(|i|
File::open(self.tmp_dir.path().join(i.to_string())).map(BufReader::new)
).collect::<Result<Vec<_>, _>>()?;
iter::ExtSortedIterator::new(readers)
}
fn write_chunk(&self, file: &Path, chunk: &mut Vec<T>) -> Result<(), T::Error> {
let new_file = OpenOptions::new().create(true).write(true).truncate(true).open(file)?;
let mut w = BufWriter::new(new_file);
for s in chunk {
s.serialize(&mut w)?;
}
Ok(())
}
}