1use crate::dataset_file::DatasetFile;
2use crate::errors::Result;
3use futures_core::stream::TryStream;
4use std::ops::RangeInclusive;
5use std::path::{Path, PathBuf};
6use tokio_stream::StreamExt;
7
8pub struct DatasetBatch {
9 file_path: PathBuf,
10 lines: RangeInclusive<usize>,
11}
12
13impl DatasetBatch {
14 pub fn new(file_path: impl AsRef<Path>, line_range: RangeInclusive<usize>) -> DatasetBatch {
15 DatasetBatch {
16 file_path: file_path.as_ref().to_path_buf(),
17 lines: line_range,
18 }
19 }
20
21 pub async fn read_lines(&self) -> Result<impl TryStream<Item = Result<String>>> {
23 let s = DatasetFile::new(&self.file_path)
24 .read_lines()
25 .await?
26 .skip(*self.lines.start())
27 .take(self.get_row_count());
28 Ok(s)
29 }
30
31 #[tokio::main(flavor = "current_thread")]
32 pub async fn read_lines_blocking(self) -> Result<impl TryStream<Item = Result<String>>> {
33 let l = self.read_lines().await?;
34 Ok(l)
35 }
36
37 pub fn get_row_count(&self) -> usize {
38 self.lines.end() - self.lines.start() + 1
39 }
40}