1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
// struct SequentialSorter {}
//! This crate implements an external sort for arbitrary
//! iterators of any size, assuming they fit on disk.
//! ```
//! # #[cfg(not(miri))]
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! use extsort_iter::*;
//!
//! let sequence = [3,21,42,9,5];
//!
//! // the default configuration will sort with up to 10M in buffered in Memory
//! // and place the files under /tmp
//! //
//! // you will most likely want to change at least the location.
//! let config = ExtsortConfig::default_for::<i32>();
//!
//! let data = sequence
//! .iter()
//! .cloned()
//! .external_sort(config)?
//! .collect::<Vec<_>>();
//! assert_eq!(&data, &[3,5,9,21,42]);
//! # Ok(())
//! # }
//! # #[cfg(miri)]
//! # fn main() {}
//! ```
//!
//! ## When not to use this crate
//!
//! When your source iterator is big because each item owns large amounts of heap memory.
//! That means the following case will result in memory exhaustion:
//! ```no_run
//! # use extsort_iter::*;
//! let data = "somestring".to_owned();
//! let iterator = std::iter::from_fn(|| Some(data.clone())).take(1_000_000);
//! let sorted = iterator.external_sort(ExtsortConfig::default_for::<String>());
//! ```
//!
//! The reason for that is that we are not dropping the values from the source iterator until they are
//! yielded by the result iterator.
//!
//! You can think of it as buffering the entire input iterator, with the values
//! themselves living on disk but all memory the values point to still living on the heap.
#[warn(clippy::all, clippy::pedantic)]
#[allow(clippy::module_name_repetitions)]
pub mod extension_trait;
mod orderer;
mod run;
mod sorter;
pub use extension_trait::*;
pub use sorter::ExtsortConfig;
#[cfg(not(miri))]
#[cfg(test)]
mod tests {
use std::{num::NonZeroUsize, path::PathBuf};
use crate::{extension_trait::ExtSortOrdExtension, sorter::ExtsortConfig};
#[test]
fn integration() {
let sequence = [
2, 82, 29, 86, 100, 67, 44, 19, 25, 10, 84, 47, 65, 42, 11, 24, 53, 92, 69, 49, 70, 36,
8, 48, 16, 91, 62, 58, 55, 18, 27, 79, 76, 40, 22, 95, 99, 28, 17, 7, 59, 30, 97, 80,
34, 33, 54, 45, 31, 52, 56, 1, 57, 38, 61, 6, 23, 94, 85, 51, 35, 68, 41, 15, 90, 14,
74, 75, 32, 73, 83, 64, 77, 89, 4, 72, 71, 21, 63, 5, 39, 12, 20, 3, 43, 88, 26, 78,
93, 60, 50, 13, 37, 87, 46, 96, 66, 98, 81, 9,
];
let data = sequence
.iter()
.external_sort(ExtsortConfig {
sort_buffer_size: NonZeroUsize::new(10).unwrap(),
run_read_size: NonZeroUsize::new(2).unwrap(),
temp_file_folder: PathBuf::from("/tmp"),
})
.unwrap();
let is_sorted = data.zip(1..).all(|(l, r)| *l == r);
assert!(is_sorted);
}
}