mule/
dataset_batch.rs

1use crate::dataset_file::DatasetFile;
2use crate::errors::Result;
3use futures_core::stream::TryStream;
4use std::ops::RangeInclusive;
5use std::path::{Path, PathBuf};
6use tokio_stream::StreamExt;
7
8pub struct DatasetBatch {
9    file_path: PathBuf,
10    lines: RangeInclusive<usize>,
11}
12
13impl DatasetBatch {
14    pub fn new(file_path: impl AsRef<Path>, line_range: RangeInclusive<usize>) -> DatasetBatch {
15        DatasetBatch {
16            file_path: file_path.as_ref().to_path_buf(),
17            lines: line_range,
18        }
19    }
20
21    /// Read lines from this batch
22    pub async fn read_lines(&self) -> Result<impl TryStream<Item = Result<String>>> {
23        let s = DatasetFile::new(&self.file_path)
24            .read_lines()
25            .await?
26            .skip(*self.lines.start())
27            .take(self.get_row_count());
28        Ok(s)
29    }
30
31    #[tokio::main(flavor = "current_thread")]
32    pub async fn read_lines_blocking(self) -> Result<impl TryStream<Item = Result<String>>> {
33        let l = self.read_lines().await?;
34        Ok(l)
35    }
36
37    pub fn get_row_count(&self) -> usize {
38        self.lines.end() - self.lines.start() + 1
39    }
40}