1use std::collections::HashMap;
9use std::io::Cursor;
10use std::path::Path;
11
12use anyhow::{anyhow, bail, Context, Result};
13
14use crate::calamine::{Data as CData, Reader, Xlsx};
15use crate::plan::SourceTable;
16use crate::styles::{self, NumFmtKind, TemplateStyles};
17use crate::value::Value;
18
19#[derive(Debug, Clone)]
20pub struct SourceData {
21 pub name: String,
22 pub headers: Vec<String>,
23 pub rows: Vec<HashMap<String, Value>>,
24}
25
26pub trait SourceReader {
27 fn read(&mut self, sheet: &str, table: &SourceTable) -> Result<SourceData>;
28}
29
30pub struct CalamineSourceReader {
37 workbook: Xlsx<Cursor<Vec<u8>>>,
38 bytes: Vec<u8>,
39 styles: Option<TemplateStyles>,
43}
44
45impl CalamineSourceReader {
46 pub fn open(path: &Path) -> Result<Self> {
47 let bytes = std::fs::read(path)
48 .with_context(|| format!("read data workbook at {}", path.display()))?;
49 Self::open_bytes(bytes)
50 }
51
52 pub fn open_bytes(bytes: Vec<u8>) -> Result<Self> {
55 let cursor = Cursor::new(bytes.clone());
56 let workbook: Xlsx<_> =
57 Xlsx::new(cursor).context("open data workbook from bytes")?;
58 Ok(CalamineSourceReader {
59 workbook,
60 bytes,
61 styles: None,
62 })
63 }
64
65 fn ensure_styles(&mut self) -> &TemplateStyles {
66 if self.styles.is_none() {
67 self.styles =
68 Some(styles::parse_template_styles_bytes(&self.bytes).unwrap_or_default());
69 }
70 self.styles.as_ref().unwrap()
71 }
72
73 pub fn first_sheet(&self) -> Option<String> {
76 self.workbook.sheet_names().into_iter().next()
77 }
78
79 pub fn resolve_sheet_name(&self, pattern: &str) -> Option<String> {
86 if let Some(prefix) = pattern.strip_suffix('*') {
87 self.workbook
88 .sheet_names()
89 .into_iter()
90 .find(|n| n.starts_with(prefix))
91 } else {
92 let pattern = pattern.to_string();
93 self.workbook
94 .sheet_names()
95 .into_iter()
96 .find(|n| n == &pattern)
97 }
98 }
99}
100
101pub fn is_blank_value(v: &Value) -> bool {
111 match v {
112 Value::Empty => true,
113 Value::String(s) => s.chars().all(|c| c.is_whitespace()),
114 _ => false,
115 }
116}
117
118impl SourceReader for CalamineSourceReader {
119 fn read(&mut self, sheet: &str, table: &SourceTable) -> Result<SourceData> {
120 self.ensure_styles();
123 let styles = self.styles.clone().unwrap_or_default();
124 let header_merges = self
125 .workbook
126 .worksheet_merge_cells(sheet)
127 .and_then(|r| r.ok())
128 .unwrap_or_default();
129
130 let range = self
131 .workbook
132 .worksheet_range(sheet)
133 .with_context(|| format!("read source sheet {sheet:?}"))?;
134
135 if range.get_size() == (0, 0) {
136 return Ok(SourceData {
137 name: sheet.to_string(),
138 headers: vec![],
139 rows: vec![],
140 });
141 }
142
143 let (last_row_abs, last_col_abs) = range
150 .end()
151 .map(|(r, c)| (r as usize, c as usize))
152 .unwrap_or((0, 0));
153
154 let (header_row, data_first, data_last_excl, col_first, col_last_excl) = match table {
157 SourceTable::HeaderRow(n) => {
158 let header = n.saturating_sub(1);
159 let row_end_excl = last_row_abs + 1;
160 let col_end_excl = last_col_abs + 1;
161 (header, header + 1, row_end_excl, 0usize, col_end_excl)
162 }
163 SourceTable::Range {
164 first_row,
165 last_row,
166 first_col,
167 last_col,
168 } => {
169 let header = first_row.saturating_sub(1);
170 let data_first = header + 1;
171 let data_last_excl = match last_row {
172 Some(lr) => (*lr).min(last_row_abs + 1),
173 None => last_row_abs + 1,
174 };
175 let col_first0 = first_col.saturating_sub(1);
176 let col_last_excl0 = match last_col {
177 Some(lc) => (*lc).min(last_col_abs + 1),
178 None => last_col_abs + 1,
179 };
180 (header, data_first, data_last_excl, col_first0, col_last_excl0)
181 }
182 };
183
184 if header_row > last_row_abs {
185 return Err(anyhow!(
186 "source sheet {sheet:?} header row {} is past the last used row {}",
187 header_row + 1,
188 last_row_abs + 1
189 ));
190 }
191
192 let slave_to_master: HashMap<(usize, usize), (u32, u32)> = header_merges
195 .iter()
196 .flat_map(|d| {
197 let master = (d.start.0, d.start.1);
198 let mut out: Vec<((usize, usize), (u32, u32))> = Vec::new();
199 for r in d.start.0..=d.end.0 {
200 for c in d.start.1..=d.end.1 {
201 if (r, c) != master {
202 out.push(((r as usize, c as usize), master));
203 }
204 }
205 }
206 out
207 })
208 .collect();
209 let master_end_col: HashMap<(u32, u32), usize> = header_merges
212 .iter()
213 .map(|d| ((d.start.0, d.start.1), d.end.1 as usize))
214 .collect();
215 let cell_at = |r: usize, c: usize| -> Option<&CData> {
216 if let Some(&(mr, mc)) = slave_to_master.get(&(r, c)) {
217 range.get_value((mr, mc))
218 } else {
219 range.get_value((r as u32, c as u32))
220 }
221 };
222
223 let mut headers: Vec<String> = Vec::new();
227 let mut header_cols: Vec<usize> = Vec::new();
228 let mut c = col_first;
229 let mut seen_masters: std::collections::HashSet<(u32, u32)> =
230 std::collections::HashSet::new();
231 while c < col_last_excl {
232 let master_pos = slave_to_master
236 .get(&(header_row, c))
237 .copied()
238 .unwrap_or((header_row as u32, c as u32));
239
240 if !seen_masters.insert(master_pos) {
244 c += 1;
245 continue;
246 }
247
248 let cell = range.get_value(master_pos);
249 match cell {
250 Some(CData::String(s)) if !s.is_empty() => {
251 headers.push(s.clone());
252 header_cols.push(c);
253 if let Some(&end_col) = master_end_col.get(&master_pos) {
256 c = end_col + 1;
257 } else {
258 c += 1;
259 }
260 }
261 None | Some(CData::Empty) => break,
262 Some(other) => {
263 bail!(
264 "source header at column {c} is not a string: {other:?} (xl3 expects text headers)"
265 );
266 }
267 }
268 }
269
270 if headers.is_empty() {
271 return Err(anyhow!("source sheet {sheet:?} has no header row"));
272 }
273
274 let mut data_rows = Vec::with_capacity(data_last_excl.saturating_sub(data_first));
275 for r in data_first..data_last_excl {
276 let mut record = HashMap::with_capacity(headers.len());
277 let mut row_blank = true;
278 for (i, header) in headers.iter().enumerate() {
279 let c = header_cols[i];
280 let raw = cell_at(r, c)
281 .map(Value::from_calamine)
282 .unwrap_or(Value::Empty);
283 let v = match &raw {
288 Value::Number(n) => {
289 let kind = styles
290 .format_code(sheet, r as u32, c as u32)
291 .as_deref()
292 .map(styles::classify_num_fmt)
293 .unwrap_or(NumFmtKind::General);
294 if kind == NumFmtKind::Date {
295 Value::DateNumber(*n)
296 } else {
297 raw
298 }
299 }
300 _ => raw,
301 };
302 if !is_blank_value(&v) {
303 row_blank = false;
304 }
305 record.insert(header.clone(), v);
306 }
307 if row_blank {
308 continue;
309 }
310 data_rows.push(record);
311 }
312
313 Ok(SourceData {
314 name: sheet.to_string(),
315 headers,
316 rows: data_rows,
317 })
318 }
319}