rsv_lib/utils/
reader.rs

1use crate::utils::util::werr_exit;
2use calamine::{open_workbook_auto, Data, Range, Reader, Rows};
3use crossbeam_channel::Sender;
4use std::error::Error;
5use std::io::stdin;
6use std::path::Path;
7use std::{
8    fs::File,
9    io::{BufRead, BufReader, Lines},
10};
11
12pub struct ChunkReader(Lines<BufReader<File>>);
13
14pub struct Task {
15    pub lines: Vec<String>,
16    pub bytes: usize,
17    pub chunk: usize,
18}
19
20impl ChunkReader {
21    pub fn new(path: &Path) -> Result<Self, std::io::Error> {
22        let rdr = BufReader::new(File::open(path)?).lines();
23        Ok(ChunkReader(rdr))
24    }
25
26    pub fn next(&mut self) -> Option<Result<String, std::io::Error>> {
27        self.0.next()
28    }
29
30    pub fn send_to_channel_by_chunks(&mut self, tx: Sender<Task>, line_buffer_n: usize) {
31        let mut lines = Vec::with_capacity(line_buffer_n);
32        let mut n = 0;
33        let mut bytes = 0;
34        let mut chunk = 1;
35
36        for l in self.0.by_ref() {
37            let l = l.unwrap();
38            n += 1;
39            bytes += l.len();
40            lines.push(l);
41            if n >= line_buffer_n {
42                tx.send(Task {
43                    lines,
44                    bytes,
45                    chunk,
46                })
47                .unwrap();
48                n = 0;
49                bytes = 0;
50                lines = Vec::with_capacity(line_buffer_n);
51                chunk += 1;
52            }
53        }
54
55        if !lines.is_empty() {
56            tx.send(Task {
57                lines,
58                bytes,
59                chunk,
60            })
61            .unwrap();
62        }
63
64        drop(tx)
65    }
66}
67
68pub struct ExcelReader {
69    range: Range<Data>,
70    pub next_called: usize,
71}
72
73pub struct ExcelChunkTask {
74    pub lines: Vec<Vec<Data>>,
75    pub n: usize,
76    pub chunk: usize,
77}
78
79impl<'a> ExcelReader {
80    pub fn new(path: &Path, sheet: usize) -> Result<Self, Box<dyn Error>> {
81        let mut workbook = open_workbook_auto(path)?;
82
83        let range = workbook.worksheet_range_at(sheet).unwrap_or_else(|| {
84            werr_exit!("{}-th sheet does not exist.", sheet);
85        })?;
86
87        Ok(ExcelReader {
88            range,
89            next_called: 0,
90        })
91    }
92
93    pub fn len(&self) -> usize {
94        self.range.get_size().0
95    }
96
97    pub fn column_n(&self) -> usize {
98        self.range.get_size().1
99    }
100
101    pub fn next(&mut self) -> Option<&[Data]> {
102        self.next_called += 1;
103        self.range.rows().next()
104    }
105
106    pub fn iter(&'a self) -> Rows<'a, Data> {
107        self.range.rows()
108    }
109
110    pub fn send_to_channel_by_chunk(self, tx: Sender<ExcelChunkTask>, size: Option<usize>) {
111        let line_buffer_n = size.unwrap_or(1000);
112        let mut lines = Vec::with_capacity(line_buffer_n);
113        let mut n = 0;
114        let mut chunk = 1;
115        for l in self.iter().skip(self.next_called) {
116            let l = l.to_owned();
117            n += 1;
118            lines.push(l);
119            if n >= line_buffer_n {
120                tx.send(ExcelChunkTask { lines, n, chunk }).unwrap();
121                n = 0;
122                lines = Vec::with_capacity(line_buffer_n);
123                chunk += 1;
124            }
125        }
126
127        if !lines.is_empty() {
128            tx.send(ExcelChunkTask { lines, n, chunk }).unwrap();
129        }
130
131        drop(tx)
132    }
133}
134
135pub struct IoReader {
136    no_header: bool,
137    top_n: Option<usize>,
138}
139
140impl IoReader {
141    pub fn new() -> Self {
142        IoReader {
143            no_header: false,
144            top_n: None,
145        }
146    }
147
148    pub fn no_header(&mut self, no_header: bool) -> &mut Self {
149        self.no_header = no_header;
150        self
151    }
152
153    pub fn top_n(&mut self, top_n: usize) -> &mut Self {
154        self.top_n = Some(top_n);
155        self
156    }
157
158    pub fn lines(&self) -> Vec<String> {
159        // open file and header
160        let lines = stdin().lock().lines();
161
162        match self.top_n {
163            Some(n) => lines
164                .take(n + 1 - self.no_header as usize)
165                .filter_map(|i| i.ok())
166                .collect(),
167            None => lines.filter_map(|i| i.ok()).collect(),
168        }
169    }
170}