1use crate::utils::util::werr_exit;
2use calamine::{open_workbook_auto, Data, Range, Reader, Rows};
3use crossbeam_channel::Sender;
4use std::error::Error;
5use std::io::stdin;
6use std::path::Path;
7use std::{
8 fs::File,
9 io::{BufRead, BufReader, Lines},
10};
11
12pub struct ChunkReader(Lines<BufReader<File>>);
13
14pub struct Task {
15 pub lines: Vec<String>,
16 pub bytes: usize,
17 pub chunk: usize,
18}
19
20impl ChunkReader {
21 pub fn new(path: &Path) -> Result<Self, std::io::Error> {
22 let rdr = BufReader::new(File::open(path)?).lines();
23 Ok(ChunkReader(rdr))
24 }
25
26 pub fn next(&mut self) -> Option<Result<String, std::io::Error>> {
27 self.0.next()
28 }
29
30 pub fn send_to_channel_by_chunks(&mut self, tx: Sender<Task>, line_buffer_n: usize) {
31 let mut lines = Vec::with_capacity(line_buffer_n);
32 let mut n = 0;
33 let mut bytes = 0;
34 let mut chunk = 1;
35
36 for l in self.0.by_ref() {
37 let l = l.unwrap();
38 n += 1;
39 bytes += l.len();
40 lines.push(l);
41 if n >= line_buffer_n {
42 tx.send(Task {
43 lines,
44 bytes,
45 chunk,
46 })
47 .unwrap();
48 n = 0;
49 bytes = 0;
50 lines = Vec::with_capacity(line_buffer_n);
51 chunk += 1;
52 }
53 }
54
55 if !lines.is_empty() {
56 tx.send(Task {
57 lines,
58 bytes,
59 chunk,
60 })
61 .unwrap();
62 }
63
64 drop(tx)
65 }
66}
67
68pub struct ExcelReader {
69 range: Range<Data>,
70 pub next_called: usize,
71}
72
73pub struct ExcelChunkTask {
74 pub lines: Vec<Vec<Data>>,
75 pub n: usize,
76 pub chunk: usize,
77}
78
79impl<'a> ExcelReader {
80 pub fn new(path: &Path, sheet: usize) -> Result<Self, Box<dyn Error>> {
81 let mut workbook = open_workbook_auto(path)?;
82
83 let range = workbook.worksheet_range_at(sheet).unwrap_or_else(|| {
84 werr_exit!("{}-th sheet does not exist.", sheet);
85 })?;
86
87 Ok(ExcelReader {
88 range,
89 next_called: 0,
90 })
91 }
92
93 pub fn len(&self) -> usize {
94 self.range.get_size().0
95 }
96
97 pub fn column_n(&self) -> usize {
98 self.range.get_size().1
99 }
100
101 pub fn next(&mut self) -> Option<&[Data]> {
102 self.next_called += 1;
103 self.range.rows().next()
104 }
105
106 pub fn iter(&'a self) -> Rows<'a, Data> {
107 self.range.rows()
108 }
109
110 pub fn send_to_channel_by_chunk(self, tx: Sender<ExcelChunkTask>, size: Option<usize>) {
111 let line_buffer_n = size.unwrap_or(1000);
112 let mut lines = Vec::with_capacity(line_buffer_n);
113 let mut n = 0;
114 let mut chunk = 1;
115 for l in self.iter().skip(self.next_called) {
116 let l = l.to_owned();
117 n += 1;
118 lines.push(l);
119 if n >= line_buffer_n {
120 tx.send(ExcelChunkTask { lines, n, chunk }).unwrap();
121 n = 0;
122 lines = Vec::with_capacity(line_buffer_n);
123 chunk += 1;
124 }
125 }
126
127 if !lines.is_empty() {
128 tx.send(ExcelChunkTask { lines, n, chunk }).unwrap();
129 }
130
131 drop(tx)
132 }
133}
134
135pub struct IoReader {
136 no_header: bool,
137 top_n: Option<usize>,
138}
139
140impl IoReader {
141 pub fn new() -> Self {
142 IoReader {
143 no_header: false,
144 top_n: None,
145 }
146 }
147
148 pub fn no_header(&mut self, no_header: bool) -> &mut Self {
149 self.no_header = no_header;
150 self
151 }
152
153 pub fn top_n(&mut self, top_n: usize) -> &mut Self {
154 self.top_n = Some(top_n);
155 self
156 }
157
158 pub fn lines(&self) -> Vec<String> {
159 let lines = stdin().lock().lines();
161
162 match self.top_n {
163 Some(n) => lines
164 .take(n + 1 - self.no_header as usize)
165 .filter_map(|i| i.ok())
166 .collect(),
167 None => lines.filter_map(|i| i.ok()).collect(),
168 }
169 }
170}