use calamine::{Data, DataRef, Range, Reader, open_workbook_auto};
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::io::{Cursor, Read, Seek};
use std::path::Path;
use crate::error::{KreuzbergError, Result};
use crate::extraction::capacity;
use crate::types::{ExcelSheet, ExcelWorkbook};
const MAX_BOUNDING_BOX_CELLS: u64 = 100_000_000;
#[cfg(feature = "office")]
use crate::extraction::office_metadata::{
extract_core_properties, extract_custom_properties, extract_xlsx_app_properties,
};
#[cfg(feature = "office")]
use serde_json::Value;
pub fn read_excel_file(file_path: &str) -> Result<ExcelWorkbook> {
let lower_path = file_path.to_lowercase();
#[cfg(feature = "office")]
let office_metadata = if lower_path.ends_with(".xlsx")
|| lower_path.ends_with(".xlsm")
|| lower_path.ends_with(".xlam")
|| lower_path.ends_with(".xltm")
{
extract_xlsx_office_metadata_from_file(file_path).ok()
} else {
None
};
#[cfg(not(feature = "office"))]
let office_metadata: Option<HashMap<String, String>> = None;
if lower_path.ends_with(".xlsx") || lower_path.ends_with(".xlsm") || lower_path.ends_with(".xltm") {
let file = std::fs::File::open(file_path)?;
let workbook = calamine::Xlsx::new(std::io::BufReader::new(file))
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse XLSX: {}", e)))?;
return process_xlsx_workbook(workbook, office_metadata);
}
if lower_path.ends_with(".xlam") {
let file = std::fs::File::open(file_path)?;
match calamine::Xlsx::new(std::io::BufReader::new(file)) {
Ok(workbook) => {
return process_xlsx_workbook(workbook, office_metadata);
}
Err(_) => {
return Ok(ExcelWorkbook {
sheets: vec![],
metadata: office_metadata.unwrap_or_default(),
});
}
}
}
if lower_path.ends_with(".xla") {
let file = std::fs::File::open(file_path)?;
match calamine::Xls::new(std::io::BufReader::new(file)) {
Ok(workbook) => {
return process_workbook(workbook, office_metadata);
}
Err(_) => {
return Ok(ExcelWorkbook {
sheets: vec![],
metadata: office_metadata.unwrap_or_default(),
});
}
}
}
if lower_path.ends_with(".xlsb") {
let file = std::fs::File::open(file_path)?;
let workbook = calamine::Xlsb::new(std::io::BufReader::new(file))
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse XLSB: {}", e)))?;
return process_workbook(workbook, office_metadata);
}
let workbook = match open_workbook_auto(Path::new(file_path)) {
Ok(wb) => wb,
Err(calamine::Error::Io(io_err)) => {
if io_err.kind() == std::io::ErrorKind::InvalidData {
return Err(KreuzbergError::parsing(format!(
"Cannot detect Excel file format: {}",
io_err
)));
}
return Err(io_err.into());
}
Err(e) => return Err(KreuzbergError::parsing(format!("Failed to parse Excel file: {}", e))),
};
process_workbook(workbook, office_metadata)
}
pub fn read_excel_bytes(data: &[u8], file_extension: &str) -> Result<ExcelWorkbook> {
#[cfg(feature = "office")]
let office_metadata = match file_extension.to_lowercase().as_str() {
".xlsx" | ".xlsm" | ".xlam" | ".xltm" => extract_xlsx_office_metadata_from_bytes(data).ok(),
_ => None,
};
#[cfg(not(feature = "office"))]
let office_metadata: Option<HashMap<String, String>> = None;
match file_extension.to_lowercase().as_str() {
".xlsx" | ".xlsm" | ".xltm" => {
let cursor = Cursor::new(data);
let workbook = calamine::Xlsx::new(cursor)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse XLSX: {}", e)))?;
process_xlsx_workbook(workbook, office_metadata)
}
".xlam" => {
let cursor = Cursor::new(data);
match calamine::Xlsx::new(cursor) {
Ok(workbook) => process_xlsx_workbook(workbook, office_metadata),
Err(_) => {
Ok(ExcelWorkbook {
sheets: vec![],
metadata: office_metadata.unwrap_or_default(),
})
}
}
}
".xls" => {
let cursor = Cursor::new(data);
let workbook = calamine::Xls::new(cursor)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse XLS: {}", e)))?;
process_workbook(workbook, office_metadata)
}
".xla" => {
let cursor = Cursor::new(data);
match calamine::Xls::new(cursor) {
Ok(workbook) => process_workbook(workbook, office_metadata),
Err(_) => {
Ok(ExcelWorkbook {
sheets: vec![],
metadata: office_metadata.unwrap_or_default(),
})
}
}
}
".xlsb" => {
let cursor = Cursor::new(data);
let workbook = calamine::Xlsb::new(cursor)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse XLSB: {}", e)))?;
process_workbook(workbook, office_metadata)
}
".ods" => {
let cursor = Cursor::new(data);
let workbook = calamine::Ods::new(cursor)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse ODS: {}", e)))?;
process_workbook(workbook, office_metadata)
}
_ => Err(KreuzbergError::parsing(format!(
"Unsupported file extension: {}",
file_extension
))),
}
}
fn process_xlsx_workbook<RS: Read + Seek>(
mut workbook: calamine::Xlsx<RS>,
office_metadata: Option<HashMap<String, String>>,
) -> Result<ExcelWorkbook> {
let sheet_names = workbook.sheet_names();
let mut sheets = Vec::with_capacity(sheet_names.len());
for name in &sheet_names {
match process_xlsx_sheet_safe(&mut workbook, name) {
Ok(sheet) => sheets.push(sheet),
Err(e) => {
tracing::warn!("Failed to process sheet '{}': {}", name, e);
}
}
}
let metadata = extract_metadata(&workbook, &sheet_names, office_metadata);
Ok(ExcelWorkbook { sheets, metadata })
}
fn process_xlsx_sheet_safe<RS: Read + Seek>(workbook: &mut calamine::Xlsx<RS>, sheet_name: &str) -> Result<ExcelSheet> {
let (cells, row_min, row_max, col_min, col_max) = {
let mut cell_reader = workbook
.worksheet_cells_reader(sheet_name)
.map_err(|e| KreuzbergError::parsing(format!("Failed to read sheet '{}': {}", sheet_name, e)))?;
let mut cells: Vec<((u32, u32), Data)> = Vec::new();
let mut row_min = u32::MAX;
let mut row_max = 0u32;
let mut col_min = u32::MAX;
let mut col_max = 0u32;
while let Ok(Some(cell)) = cell_reader.next_cell() {
let (row, col) = cell.get_position();
row_min = row_min.min(row);
row_max = row_max.max(row);
col_min = col_min.min(col);
col_max = col_max.max(col);
let data: Data = match cell.get_value() {
DataRef::Empty => Data::Empty,
DataRef::String(s) => Data::String(s.clone()),
DataRef::SharedString(s) => Data::String(s.to_string()),
DataRef::Float(f) => Data::Float(*f),
DataRef::Int(i) => Data::Int(*i),
DataRef::Bool(b) => Data::Bool(*b),
DataRef::DateTime(dt) => Data::DateTime(*dt),
DataRef::DateTimeIso(s) => Data::DateTimeIso(s.clone()),
DataRef::DurationIso(s) => Data::DurationIso(s.clone()),
DataRef::Error(e) => Data::Error(e.clone()),
};
cells.push(((row, col), data));
}
(cells, row_min, row_max, col_min, col_max)
};
if cells.is_empty() {
return Ok(ExcelSheet {
name: sheet_name.to_owned(),
markdown: format!("## {}\n\n*Empty sheet*", sheet_name),
row_count: 0,
col_count: 0,
cell_count: 0,
table_cells: None,
});
}
let bb_rows = (row_max - row_min + 1) as u64;
let bb_cols = (col_max - col_min + 1) as u64;
let bb_cells = bb_rows.saturating_mul(bb_cols);
if bb_cells > MAX_BOUNDING_BOX_CELLS {
return process_sparse_sheet_from_cells(sheet_name, cells, row_min, row_max, col_min, col_max);
}
let range = workbook
.worksheet_range(sheet_name)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse sheet '{}': {}", sheet_name, e)))?;
Ok(process_sheet(sheet_name, &range))
}
fn process_sparse_sheet_from_cells(
sheet_name: &str,
cells: Vec<((u32, u32), Data)>,
row_min: u32,
row_max: u32,
col_min: u32,
col_max: u32,
) -> Result<ExcelSheet> {
let cell_count = cells.len();
let bb_rows = (row_max - row_min + 1) as usize;
let bb_cols = (col_max - col_min + 1) as usize;
let mut markdown = String::with_capacity(500 + cell_count * 50);
write!(
markdown,
"## {}\n\n*Note: Sheet contains sparse data spanning {} rows x {} columns ({} actual cells). \
Bounding box too large for dense extraction. Showing actual cell data below.*\n\n",
sheet_name, bb_rows, bb_cols, cell_count
)
.expect("write to String cannot fail");
let mut cells_by_row: HashMap<u32, Vec<(u32, &Data)>> = HashMap::new();
for ((row, col), data) in &cells {
cells_by_row.entry(*row).or_default().push((*col, data));
}
let mut rows: Vec<_> = cells_by_row.keys().copied().collect();
rows.sort_unstable();
let mut output_count = 0;
const MAX_OUTPUT_CELLS: usize = 1000;
for row in rows {
if output_count >= MAX_OUTPUT_CELLS {
write!(markdown, "\n... ({} more cells not shown)\n", cell_count - output_count)
.expect("write to String cannot fail");
break;
}
let mut row_cells = cells_by_row.remove(&row).unwrap_or_default();
row_cells.sort_by_key(|(col, _)| *col);
for (col, data) in row_cells {
if output_count >= MAX_OUTPUT_CELLS {
break;
}
let cell_ref = col_to_excel_letter(col);
let cell_str = format_cell_to_string(data);
if !cell_str.is_empty() {
writeln!(markdown, "- **{}{}**: {}", cell_ref, row + 1, cell_str).expect("write to String cannot fail");
output_count += 1;
}
}
}
Ok(ExcelSheet {
name: sheet_name.to_owned(),
markdown,
row_count: bb_rows,
col_count: bb_cols,
cell_count,
table_cells: None, })
}
fn col_to_excel_letter(col: u32) -> String {
let mut result = String::new();
let mut n = col + 1; while n > 0 {
n -= 1;
result.insert(0, (b'A' + (n % 26) as u8) as char);
n /= 26;
}
result
}
fn process_workbook<RS, R>(mut workbook: R, office_metadata: Option<HashMap<String, String>>) -> Result<ExcelWorkbook>
where
RS: std::io::Read + std::io::Seek,
R: Reader<RS>,
{
let sheet_names = workbook.sheet_names();
let mut sheets = Vec::with_capacity(sheet_names.len());
for name in &sheet_names {
if let Ok(range) = workbook.worksheet_range(name) {
sheets.push(process_sheet(name, &range));
}
}
let metadata = extract_metadata(&workbook, &sheet_names, office_metadata);
Ok(ExcelWorkbook { sheets, metadata })
}
#[inline]
fn process_sheet(name: &str, range: &Range<Data>) -> ExcelSheet {
let (rows, cols) = range.get_size();
let cell_count = range.used_cells().count();
let estimated_capacity = 50 + (cols * 20) + (cell_count * 12);
if rows == 0 || cols == 0 {
let markdown = format!("## {}\n\n*Empty sheet*", name);
ExcelSheet {
name: name.to_owned(),
markdown,
row_count: rows,
col_count: cols,
cell_count,
table_cells: None,
}
} else {
let (markdown, table_cells) = generate_markdown_and_cells(name, range, estimated_capacity);
ExcelSheet {
name: name.to_owned(),
markdown,
row_count: rows,
col_count: cols,
cell_count,
table_cells: Some(table_cells),
}
}
}
fn generate_markdown_and_cells(sheet_name: &str, range: &Range<Data>, capacity: usize) -> (String, Vec<Vec<String>>) {
const MAX_REASONABLE_ROWS: usize = 100_000;
let (declared_rows, _declared_cols) = range.get_size();
if declared_rows > MAX_REASONABLE_ROWS {
let actual_cell_count = range.used_cells().count();
if actual_cell_count < 10_000 {
let result_capacity = 100 + sheet_name.len();
let mut result = String::with_capacity(result_capacity);
write!(
result,
"## {}\n\n*Sheet has extreme declared dimensions ({} rows) with minimal actual data ({} cells). Skipping to prevent OOM.*",
sheet_name, declared_rows, actual_cell_count
).unwrap();
return (result, Vec::new());
}
}
let rows: Vec<_> = range.rows().collect();
if rows.is_empty() {
let result_capacity = 50 + sheet_name.len();
let mut result = String::with_capacity(result_capacity);
write!(result, "## {}\n\n*No data*", sheet_name).unwrap();
return (result, Vec::new());
}
let header = &rows[0];
let header_len = header.len();
let row_count = rows.len();
let table_capacity = capacity::estimate_table_markdown_capacity(row_count, header_len);
let mut exact_size = 16 + sheet_name.len();
exact_size += 2 + (header_len * 2);
exact_size += header_len * 10;
exact_size += 5 + (header_len * 5);
exact_size += (row_count - 1) * (5 + header_len * 15);
let mut markdown = String::with_capacity(exact_size.max(table_capacity).max(capacity));
let mut cells: Vec<Vec<String>> = Vec::with_capacity(row_count);
write!(markdown, "## {}\n\n", sheet_name).unwrap();
let mut header_cells = Vec::with_capacity(header_len);
markdown.push_str("| ");
for (i, cell) in header.iter().enumerate() {
if i > 0 {
markdown.push_str(" | ");
}
let cell_str = format_cell_to_string(cell);
if cell_str.contains('|') || cell_str.contains('\\') {
escape_markdown_into(&mut markdown, &cell_str);
} else {
markdown.push_str(&cell_str);
}
header_cells.push(cell_str);
}
markdown.push_str(" |\n");
cells.push(header_cells);
markdown.push_str("| ");
for i in 0..header_len {
if i > 0 {
markdown.push_str(" | ");
}
markdown.push_str("---");
}
markdown.push_str(" |\n");
for row in rows.iter().skip(1) {
let mut row_cells = Vec::with_capacity(header_len);
markdown.push_str("| ");
for i in 0..header_len {
if i > 0 {
markdown.push_str(" | ");
}
let cell_str = if let Some(cell) = row.get(i) {
let cell_str = format_cell_to_string(cell);
if cell_str.contains('|') || cell_str.contains('\\') {
escape_markdown_into(&mut markdown, &cell_str);
} else {
markdown.push_str(&cell_str);
}
cell_str
} else {
String::new()
};
row_cells.push(cell_str);
}
markdown.push_str(" |\n");
cells.push(row_cells);
}
(markdown, cells)
}
#[inline]
fn format_cell_to_string(data: &Data) -> String {
match data {
Data::Empty => String::new(),
Data::String(s) => s.clone(),
Data::Float(f) => {
if f.fract() == 0.0 {
format!("{:.1}", f)
} else {
format!("{}", f)
}
}
Data::Int(i) => format!("{}", i),
Data::Bool(b) => {
if *b {
"true".to_string()
} else {
"false".to_string()
}
}
Data::DateTime(dt) => {
if let Some(datetime) = dt.as_datetime() {
format!("{}", datetime.format("%Y-%m-%d %H:%M:%S"))
} else {
format!("{:?}", dt)
}
}
Data::Error(e) => format!("#ERR: {:?}", e),
Data::DateTimeIso(s) => s.clone(),
Data::DurationIso(s) => format!("DURATION: {}", s),
}
}
#[inline]
fn escape_markdown_into(buffer: &mut String, s: &str) {
for ch in s.chars() {
match ch {
'|' => buffer.push_str("\\|"),
'\\' => buffer.push_str("\\\\"),
_ => buffer.push(ch),
}
}
}
fn extract_metadata<RS, R>(
workbook: &R,
sheet_names: &[String],
office_metadata: Option<HashMap<String, String>>,
) -> HashMap<String, String>
where
RS: std::io::Read + std::io::Seek,
R: Reader<RS>,
{
let mut metadata = HashMap::with_capacity(4);
let sheet_count = sheet_names.len();
metadata.insert("sheet_count".to_owned(), sheet_count.to_string());
let sheet_names_str = if sheet_count <= 5 {
sheet_names.join(", ")
} else {
let mut result = String::with_capacity(100);
for (i, name) in sheet_names.iter().take(5).enumerate() {
if i > 0 {
result.push_str(", ");
}
result.push_str(name);
}
write!(result, ", ... ({} total)", sheet_count).unwrap();
result
};
metadata.insert("sheet_names".to_owned(), sheet_names_str);
let _workbook_metadata = workbook.metadata();
if let Some(office_meta) = office_metadata {
for (key, value) in office_meta {
metadata.insert(key, value);
}
}
metadata
}
pub fn excel_to_text(workbook: &ExcelWorkbook) -> String {
let mut result = String::new();
for (i, sheet) in workbook.sheets.iter().enumerate() {
if i > 0 {
result.push_str("\n\n");
}
if let Some(cells) = &sheet.table_cells {
for (row_idx, row) in cells.iter().enumerate() {
if row_idx > 0 {
result.push('\n');
}
let line: String = row
.iter()
.map(|cell| cell.trim())
.filter(|cell| !cell.is_empty())
.collect::<Vec<_>>()
.join(" ");
result.push_str(&line);
}
}
}
result
}
pub fn excel_to_markdown(workbook: &ExcelWorkbook) -> String {
let total_capacity: usize = workbook.sheets.iter().map(|sheet| sheet.markdown.len() + 2).sum();
let mut result = String::with_capacity(total_capacity);
for (i, sheet) in workbook.sheets.iter().enumerate() {
if i > 0 {
result.push_str("\n\n");
}
let sheet_content = sheet.markdown.trim_end();
result.push_str(sheet_content);
}
result
}
#[cfg(feature = "office")]
fn extract_xlsx_office_metadata_from_file(file_path: &str) -> Result<HashMap<String, String>> {
use std::fs::File;
use zip::ZipArchive;
let file = File::open(file_path)?;
let mut archive =
ZipArchive::new(file).map_err(|e| KreuzbergError::parsing(format!("Failed to open ZIP archive: {}", e)))?;
extract_xlsx_office_metadata_from_archive(&mut archive)
}
#[cfg(feature = "office")]
fn extract_xlsx_office_metadata_from_bytes(data: &[u8]) -> Result<HashMap<String, String>> {
use zip::ZipArchive;
let cursor = Cursor::new(data);
let mut archive =
ZipArchive::new(cursor).map_err(|e| KreuzbergError::parsing(format!("Failed to open ZIP archive: {}", e)))?;
extract_xlsx_office_metadata_from_archive(&mut archive)
}
#[cfg(feature = "office")]
fn extract_xlsx_office_metadata_from_archive<R: std::io::Read + std::io::Seek>(
archive: &mut zip::ZipArchive<R>,
) -> Result<HashMap<String, String>> {
let mut metadata = HashMap::new();
if let Ok(core) = extract_core_properties(archive) {
if let Some(title) = core.title {
metadata.insert("title".to_string(), title);
}
if let Some(creator) = core.creator {
metadata.insert("creator".to_string(), creator.clone());
metadata.insert("created_by".to_string(), creator);
}
if let Some(subject) = core.subject {
metadata.insert("subject".to_string(), subject);
}
if let Some(keywords) = core.keywords {
metadata.insert("keywords".to_string(), keywords);
}
if let Some(description) = core.description {
metadata.insert("description".to_string(), description);
}
if let Some(modified_by) = core.last_modified_by {
metadata.insert("modified_by".to_string(), modified_by);
}
if let Some(created) = core.created {
metadata.insert("created_at".to_string(), created);
}
if let Some(modified) = core.modified {
metadata.insert("modified_at".to_string(), modified);
}
if let Some(revision) = core.revision {
metadata.insert("revision".to_string(), revision);
}
if let Some(category) = core.category {
metadata.insert("category".to_string(), category);
}
if let Some(content_status) = core.content_status {
metadata.insert("content_status".to_string(), content_status);
}
if let Some(language) = core.language {
metadata.insert("language".to_string(), language);
}
}
if let Ok(app) = extract_xlsx_app_properties(archive) {
if !app.worksheet_names.is_empty() {
metadata.insert("worksheet_names".to_string(), app.worksheet_names.join(", "));
}
if let Some(company) = app.company {
metadata.insert("organization".to_string(), company);
}
if let Some(application) = app.application {
metadata.insert("application".to_string(), application);
}
if let Some(app_version) = app.app_version {
metadata.insert("application_version".to_string(), app_version);
}
}
if let Ok(custom) = extract_custom_properties(archive) {
for (key, value) in custom {
let value_str = match value {
Value::String(s) => s,
Value::Number(n) => n.to_string(),
Value::Bool(b) => b.to_string(),
Value::Null => "null".to_string(),
Value::Array(_) | Value::Object(_) => value.to_string(),
};
metadata.insert(format!("custom_{}", key), value_str);
}
}
Ok(metadata)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_cell_to_string_basic() {
assert_eq!(format_cell_to_string(&Data::Empty), "");
assert_eq!(format_cell_to_string(&Data::String("test".to_owned())), "test");
assert_eq!(format_cell_to_string(&Data::Float(42.0)), "42.0");
assert_eq!(format_cell_to_string(&Data::Int(100)), "100");
assert_eq!(format_cell_to_string(&Data::Bool(true)), "true");
}
#[test]
fn test_escape_markdown_into() {
let mut buffer = String::with_capacity(50);
escape_markdown_into(&mut buffer, "normal text");
assert_eq!(buffer, "normal text");
buffer.clear();
escape_markdown_into(&mut buffer, "text|with|pipes");
assert_eq!(buffer, "text\\|with\\|pipes");
buffer.clear();
escape_markdown_into(&mut buffer, "back\\slash");
assert_eq!(buffer, "back\\\\slash");
}
#[test]
fn test_capacity_optimization() {
let buffer = String::with_capacity(100);
assert!(buffer.capacity() >= 100);
}
#[test]
fn test_format_cell_value_datetime() {
use calamine::{ExcelDateTime, ExcelDateTimeType};
let dt = Data::DateTime(ExcelDateTime::new(49353.5, ExcelDateTimeType::DateTime, false));
let result = format_cell_to_string(&dt);
assert!(!result.is_empty());
}
#[test]
fn test_format_cell_value_error() {
use calamine::CellErrorType;
let result = format_cell_to_string(&Data::Error(CellErrorType::Div0));
assert!(result.contains("#ERR"));
}
#[test]
fn test_format_cell_value_datetime_iso() {
let result = format_cell_to_string(&Data::DateTimeIso("2024-01-01T10:30:00".to_owned()));
assert_eq!(result, "2024-01-01T10:30:00");
}
#[test]
fn test_format_cell_value_duration_iso() {
let result = format_cell_to_string(&Data::DurationIso("PT1H30M".to_owned()));
assert_eq!(result, "DURATION: PT1H30M");
}
#[test]
fn test_escape_markdown_combined() {
let mut buffer = String::new();
escape_markdown_into(&mut buffer, "text|with|pipes\\and\\slashes");
assert_eq!(buffer, "text\\|with\\|pipes\\\\and\\\\slashes");
}
#[test]
fn test_escape_markdown_no_special_chars() {
let mut buffer = String::new();
escape_markdown_into(&mut buffer, "plain text");
assert_eq!(buffer, "plain text");
}
#[test]
fn test_process_sheet_empty() {
let range: Range<Data> = Range::empty();
let sheet = process_sheet("EmptySheet", &range);
assert_eq!(sheet.name, "EmptySheet");
assert_eq!(sheet.row_count, 0);
assert_eq!(sheet.col_count, 0);
assert_eq!(sheet.cell_count, 0);
assert!(sheet.markdown.contains("Empty sheet"));
}
#[test]
fn test_process_sheet_single_cell() {
let mut range: Range<Data> = Range::new((0, 0), (0, 0));
range.set_value((0, 0), Data::String("Single Cell".to_owned()));
let sheet = process_sheet("Sheet1", &range);
assert_eq!(sheet.name, "Sheet1");
assert_eq!(sheet.row_count, 1);
assert_eq!(sheet.col_count, 1);
assert_eq!(sheet.cell_count, 1);
assert!(sheet.markdown.contains("Single Cell"));
}
#[test]
fn test_process_sheet_with_data() {
let mut range: Range<Data> = Range::new((0, 0), (2, 1));
range.set_value((0, 0), Data::String("Name".to_owned()));
range.set_value((0, 1), Data::String("Age".to_owned()));
range.set_value((1, 0), Data::String("Alice".to_owned()));
range.set_value((1, 1), Data::Int(30));
range.set_value((2, 0), Data::String("Bob".to_owned()));
range.set_value((2, 1), Data::Int(25));
let sheet = process_sheet("People", &range);
assert_eq!(sheet.name, "People");
assert_eq!(sheet.row_count, 3);
assert_eq!(sheet.col_count, 2);
assert!(sheet.markdown.contains("Name"));
assert!(sheet.markdown.contains("Age"));
assert!(sheet.markdown.contains("Alice"));
assert!(sheet.markdown.contains("30"));
}
#[test]
fn test_generate_markdown_and_cells_empty() {
let range: Range<Data> = Range::empty();
let (markdown, cells) = generate_markdown_and_cells("Test", &range, 100);
assert!(markdown.contains("## Test"));
assert!(cells.is_empty());
}
#[test]
fn test_generate_markdown_and_cells_with_data() {
let mut range: Range<Data> = Range::new((0, 0), (1, 2));
range.set_value((0, 0), Data::String("Col1".to_owned()));
range.set_value((0, 1), Data::String("Col2".to_owned()));
range.set_value((0, 2), Data::String("Col3".to_owned()));
range.set_value((1, 0), Data::String("A".to_owned()));
range.set_value((1, 1), Data::String("B".to_owned()));
range.set_value((1, 2), Data::String("C".to_owned()));
let (markdown, cells) = generate_markdown_and_cells("Sheet1", &range, 200);
assert!(markdown.contains("## Sheet1"));
assert!(markdown.contains("Col1"));
assert!(markdown.contains("---"));
assert_eq!(cells.len(), 2);
}
#[test]
fn test_generate_markdown_and_cells_sparse() {
let mut range: Range<Data> = Range::new((0, 0), (2, 2));
range.set_value((0, 0), Data::String("A".to_owned()));
range.set_value((0, 1), Data::String("B".to_owned()));
range.set_value((0, 2), Data::String("C".to_owned()));
range.set_value((1, 0), Data::String("X".to_owned()));
range.set_value((1, 2), Data::String("Z".to_owned()));
let (markdown, cells) = generate_markdown_and_cells("Sparse", &range, 200);
assert!(markdown.contains("X"));
assert!(markdown.contains("Z"));
assert_eq!(cells.len(), 3);
}
#[test]
fn test_format_cell_value_float_integer() {
let result = format_cell_to_string(&Data::Float(100.0));
assert_eq!(result, "100.0");
}
#[test]
fn test_format_cell_value_float_decimal() {
let result = format_cell_to_string(&Data::Float(12.3456));
assert_eq!(result, "12.3456");
}
#[test]
fn test_format_cell_value_bool_false() {
let result = format_cell_to_string(&Data::Bool(false));
assert_eq!(result, "false");
}
#[test]
fn test_format_cell_escape_pipe() {
let mut buffer = String::new();
escape_markdown_into(&mut buffer, "value|with|pipes");
assert_eq!(buffer, "value\\|with\\|pipes");
}
#[test]
fn test_format_cell_escape_backslash() {
let mut buffer = String::new();
escape_markdown_into(&mut buffer, "path\\to\\file");
assert_eq!(buffer, "path\\\\to\\\\file");
}
#[test]
fn test_markdown_table_structure() {
let mut range: Range<Data> = Range::new((0, 0), (2, 1));
range.set_value((0, 0), Data::String("H1".to_owned()));
range.set_value((0, 1), Data::String("H2".to_owned()));
range.set_value((1, 0), Data::String("A".to_owned()));
range.set_value((1, 1), Data::String("B".to_owned()));
let (markdown, _cells) = generate_markdown_and_cells("Test", &range, 100);
let lines: Vec<&str> = markdown.lines().collect();
assert!(lines[0].contains("## Test"));
assert!(lines[2].starts_with("| "));
assert!(lines[3].contains("---"));
assert!(lines[4].starts_with("| "));
}
#[test]
fn test_process_sheet_metadata() {
let mut range: Range<Data> = Range::new((0, 0), (9, 4));
for row in 0..10 {
for col in 0..5 {
range.set_value((row, col), Data::String(format!("R{}C{}", row, col)));
}
}
let sheet = process_sheet("Data", &range);
assert_eq!(sheet.row_count, 10);
assert_eq!(sheet.col_count, 5);
assert_eq!(sheet.cell_count, 50);
}
}