use crate::{Document, Error, Extractor, Result};
use calamine::{open_workbook_auto, Reader};
use std::fmt::Write as _;
use std::path::Path;
#[derive(Default)]
pub struct CalamineExtractor;
impl CalamineExtractor {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Extractor for CalamineExtractor {
fn extensions(&self) -> &[&'static str] {
&["xlsx", "xls", "xlsb", "xlsm", "ods"]
}
fn name(&self) -> &'static str {
"calamine"
}
fn extract(&self, path: &Path) -> Result<Document> {
let mut workbook = open_workbook_auto(path)
.map_err(|e| Error::ParseError(format!("calamine open failed: {e}")))?;
let mut markdown = String::new();
let sheet_names: Vec<String> = workbook.sheet_names().clone();
for (sheet_idx, sheet_name) in sheet_names.iter().enumerate() {
if sheet_idx > 0 {
markdown.push_str("\n\n");
}
markdown.push_str("## ");
markdown.push_str(sheet_name);
markdown.push_str("\n\n");
match workbook.worksheet_range(sheet_name) {
Ok(range) if range.is_empty() => {
markdown.push_str("(empty)\n");
}
Ok(range) => {
render_range_as_table(&range, &mut markdown);
}
Err(e) => {
let _ = writeln!(markdown, "(could not read sheet: {e})");
}
}
}
Ok(Document {
markdown,
title: None,
metadata: std::collections::HashMap::new(),
})
}
}
fn render_range_as_table(range: &calamine::Range<calamine::Data>, out: &mut String) {
let mut rows = range.rows();
let Some(header) = rows.next() else {
return;
};
let col_count = header.len();
out.push('|');
for cell in header {
out.push(' ');
out.push_str(&escape_cell(&cell.to_string()));
out.push_str(" |");
}
out.push('\n');
out.push('|');
for _ in 0..col_count {
out.push_str(" --- |");
}
out.push('\n');
for row in rows {
out.push('|');
for col_idx in 0..col_count {
let cell_str = row
.get(col_idx)
.map(std::string::ToString::to_string)
.unwrap_or_default();
out.push(' ');
out.push_str(&escape_cell(&cell_str));
out.push_str(" |");
}
out.push('\n');
}
}
fn escape_cell(s: &str) -> String {
s.replace('|', "|").replace('\n', " ")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extensions_cover_common_spreadsheet_formats() {
let ext = CalamineExtractor.extensions();
for required in ["xlsx", "xls", "ods"] {
assert!(
ext.contains(&required),
"expected calamine to handle .{required}, got {ext:?}"
);
}
}
#[test]
fn name_identifies_backend() {
assert_eq!(CalamineExtractor.name(), "calamine");
}
#[test]
fn escape_cell_handles_pipes_and_newlines() {
assert_eq!(escape_cell("a|b"), "a|b");
assert_eq!(escape_cell("a\nb"), "a b");
assert_eq!(escape_cell("plain text"), "plain text");
}
#[test]
fn missing_file_returns_typed_error() {
let result = CalamineExtractor.extract(std::path::Path::new("/nonexistent-file-here.xlsx"));
assert!(matches!(result, Err(Error::ParseError(_))));
}
}