use crate::api::{DateFormat, FormulaMode, MergeStrategy, OutputFormat, SheetSelector};
use crate::error::XlsxToMdError;
use crate::types::CellRange;
use chrono::NaiveDate;
use rayon::prelude::*;
use std::io::{Cursor, Read, Seek, Write};
#[derive(Debug, Clone)]
pub(crate) struct ConversionConfig {
pub sheet_selector: SheetSelector,
pub merge_strategy: MergeStrategy,
pub date_format: DateFormat,
pub formula_mode: FormulaMode,
pub include_hidden: bool,
pub range: Option<CellRange>,
pub output_format: OutputFormat,
}
impl Default for ConversionConfig {
fn default() -> Self {
Self {
sheet_selector: SheetSelector::All,
merge_strategy: MergeStrategy::DataDuplication,
date_format: DateFormat::Iso8601,
formula_mode: FormulaMode::CachedValue,
include_hidden: false,
range: None,
output_format: OutputFormat::Markdown,
}
}
}
#[derive(Debug)]
pub struct ConverterBuilder {
config: ConversionConfig,
}
impl Default for ConverterBuilder {
fn default() -> Self {
Self::new()
}
}
impl ConverterBuilder {
pub fn new() -> Self {
Self {
config: ConversionConfig::default(),
}
}
pub fn with_sheet_selector(mut self, selector: SheetSelector) -> Self {
self.config.sheet_selector = selector;
self
}
pub fn with_merge_strategy(mut self, strategy: MergeStrategy) -> Self {
self.config.merge_strategy = strategy;
self
}
pub fn with_date_format(mut self, format: DateFormat) -> Self {
self.config.date_format = format;
self
}
pub fn with_formula_mode(mut self, mode: FormulaMode) -> Self {
self.config.formula_mode = mode;
self
}
pub fn include_hidden(mut self, include: bool) -> Self {
self.config.include_hidden = include;
self
}
pub fn with_range(mut self, start: (u32, u32), end: (u32, u32)) -> Self {
use crate::types::CellCoord;
self.config.range = Some(CellRange::new(
CellCoord::new(start.0, start.1),
CellCoord::new(end.0, end.1),
));
self
}
pub fn with_output_format(mut self, format: OutputFormat) -> Self {
self.config.output_format = format;
self
}
pub fn build(self) -> Result<Converter, XlsxToMdError> {
if let Some(range) = &self.config.range {
if range.start.row > range.end.row {
return Err(XlsxToMdError::Config(format!(
"Invalid range: start row ({}) > end row ({})",
range.start.row, range.end.row
)));
}
if range.start.col > range.end.col {
return Err(XlsxToMdError::Config(format!(
"Invalid range: start col ({}) > end col ({})",
range.start.col, range.end.col
)));
}
}
if let DateFormat::Custom(ref format_str) = self.config.date_format {
let test_date = NaiveDate::from_ymd_opt(2025, 1, 1)
.ok_or_else(|| XlsxToMdError::Config("Failed to create test date".to_string()))?;
let formatted = test_date.format(format_str).to_string();
if formatted.is_empty() {
return Err(XlsxToMdError::Config(format!(
"Invalid date format string: '{}'",
format_str
)));
}
}
Ok(Converter::new(self.config))
}
}
#[derive(Debug)]
pub struct Converter {
config: ConversionConfig,
formatter: crate::formatter::CellFormatter,
}
impl Converter {
pub(crate) fn new(config: ConversionConfig) -> Self {
Self {
formatter: crate::formatter::CellFormatter::new(),
config,
}
}
pub fn convert<R: Read + Seek, W: Write>(
&self,
mut input: R,
mut output: W,
) -> Result<(), XlsxToMdError> {
use std::io::{BufWriter, Write};
use crate::security::SecurityConfig;
let security_config = SecurityConfig::default();
let mut buffer = Vec::new();
let bytes_read = input.read_to_end(&mut buffer)?;
if bytes_read as u64 > security_config.max_input_file_size {
return Err(XlsxToMdError::SecurityViolation(format!(
"Input file size exceeds maximum: {} bytes (max: {} bytes)",
bytes_read, security_config.max_input_file_size
)));
}
let parser = crate::parser::WorkbookParser::open_with_metadata(Cursor::new(buffer.clone()))?;
let sheet_names =
parser.select_sheets(&self.config.sheet_selector, self.config.include_hidden)?;
let metadata = parser.metadata()
.ok_or_else(|| XlsxToMdError::Config("Metadata not available".to_string()))?
.clone();
let sheet_outputs: Result<Vec<(usize, String)>, XlsxToMdError> = sheet_names
.par_iter()
.enumerate()
.map(|(sheet_idx, sheet_name)| {
let mut parser = crate::parser::WorkbookParser::open_with_existing_metadata(
Cursor::new(buffer.clone()),
metadata.clone(),
)?;
let (metadata, raw_cells) = parser.parse_sheet(sheet_name, &self.config)?;
let mut formatted_cells = Vec::new();
for raw_cell in &raw_cells {
let content =
self.formatter
.format_cell(raw_cell, &self.config, metadata.is_1904)?;
formatted_cells.push((raw_cell.coord, content));
}
let grid = crate::grid::LogicalGrid::build(
raw_cells,
formatted_cells,
&metadata,
self.config.merge_strategy,
)?;
let formatter = crate::output::OutputFormatter::from_format(self.config.output_format);
let mut output_buffer = Vec::new();
formatter.render(&grid, &mut output_buffer, &metadata.merged_regions)?;
let output_string = String::from_utf8(output_buffer).map_err(|e| {
XlsxToMdError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
})?;
Ok((sheet_idx, output_string))
})
.collect();
let mut sheet_outputs = sheet_outputs?;
sheet_outputs.sort_by_key(|(idx, _)| *idx);
let mut writer = BufWriter::new(&mut output);
for (sheet_idx, (_, sheet_output)) in sheet_outputs.iter().enumerate() {
if sheet_idx > 0 && self.config.output_format == crate::api::OutputFormat::Markdown {
writeln!(writer, "\n---\n")?;
} else if sheet_idx > 0 {
writeln!(writer)?;
}
if self.config.output_format == crate::api::OutputFormat::Markdown {
writeln!(writer, "# {}\n", sheet_names[sheet_idx])?;
} else if self.config.output_format == crate::api::OutputFormat::Json {
} else {
if self.config.output_format == crate::api::OutputFormat::Csv {
writeln!(writer, "# Sheet: {}\n", sheet_names[sheet_idx])?;
} else if self.config.output_format == crate::api::OutputFormat::Html {
writeln!(writer, "<!-- Sheet: {} -->\n", sheet_names[sheet_idx])?;
}
}
write!(writer, "{}", sheet_output)?;
}
writer.flush()?;
Ok(())
}
pub fn convert_to_string<R: Read + Seek>(&self, input: R) -> Result<String, XlsxToMdError> {
let mut buffer = Vec::new();
self.convert(input, &mut buffer)?;
let result = String::from_utf8(buffer).map_err(|e| {
XlsxToMdError::Io(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
})?;
Ok(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::CellCoord;
#[test]
fn test_converter_builder_new() {
let builder = ConverterBuilder::new();
assert_eq!(builder.config.sheet_selector, SheetSelector::All);
assert_eq!(
builder.config.merge_strategy,
MergeStrategy::DataDuplication
);
assert_eq!(builder.config.date_format, DateFormat::Iso8601);
assert_eq!(builder.config.formula_mode, FormulaMode::CachedValue);
assert!(!builder.config.include_hidden);
assert!(builder.config.range.is_none());
}
#[test]
fn test_with_sheet_selector() {
let builder = ConverterBuilder::new().with_sheet_selector(SheetSelector::Index(0));
assert!(matches!(
builder.config.sheet_selector,
SheetSelector::Index(0)
));
let builder =
ConverterBuilder::new().with_sheet_selector(SheetSelector::Name("Sheet1".to_string()));
assert!(matches!(
builder.config.sheet_selector,
SheetSelector::Name(ref name) if name == "Sheet1"
));
}
#[test]
fn test_with_merge_strategy() {
let builder = ConverterBuilder::new().with_merge_strategy(MergeStrategy::HtmlFallback);
assert_eq!(builder.config.merge_strategy, MergeStrategy::HtmlFallback);
}
#[test]
fn test_with_date_format() {
let builder = ConverterBuilder::new()
.with_date_format(DateFormat::Custom("%Y年%m月%d日".to_string()));
assert!(matches!(
builder.config.date_format,
DateFormat::Custom(ref s) if s == "%Y年%m月%d日"
));
}
#[test]
fn test_with_formula_mode() {
let builder = ConverterBuilder::new().with_formula_mode(FormulaMode::Formula);
assert_eq!(builder.config.formula_mode, FormulaMode::Formula);
}
#[test]
fn test_include_hidden() {
let builder = ConverterBuilder::new().include_hidden(true);
assert!(builder.config.include_hidden);
}
#[test]
fn test_with_range() {
let builder = ConverterBuilder::new().with_range((0, 0), (9, 2));
assert!(builder.config.range.is_some());
let range = builder.config.range.unwrap();
assert_eq!(range.start, CellCoord::new(0, 0));
assert_eq!(range.end, CellCoord::new(9, 2));
}
#[test]
fn test_build_success() {
let result = ConverterBuilder::new().build();
assert!(result.is_ok());
}
#[test]
fn test_build_with_invalid_range_row() {
let result = ConverterBuilder::new().with_range((10, 0), (0, 0)).build();
assert!(result.is_err());
match result {
Err(XlsxToMdError::Config(msg)) => {
assert!(msg.contains("start row"));
}
_ => panic!("Expected Config error"),
}
}
#[test]
fn test_build_with_invalid_range_col() {
let result = ConverterBuilder::new().with_range((0, 10), (0, 0)).build();
assert!(result.is_err());
match result {
Err(XlsxToMdError::Config(msg)) => {
assert!(msg.contains("start col"));
}
_ => panic!("Expected Config error"),
}
}
#[test]
fn test_build_with_valid_custom_date_format() {
let result = ConverterBuilder::new()
.with_date_format(DateFormat::Custom("%Y-%m-%d".to_string()))
.build();
assert!(result.is_ok());
}
#[test]
fn test_build_with_invalid_custom_date_format() {
let result = ConverterBuilder::new()
.with_date_format(DateFormat::Custom("".to_string()))
.build();
assert!(result.is_err());
match result {
Err(XlsxToMdError::Config(msg)) => {
assert!(msg.contains("Invalid date format"));
}
_ => panic!("Expected Config error"),
}
}
#[test]
fn test_builder_method_chaining() {
let builder = ConverterBuilder::new()
.with_sheet_selector(SheetSelector::Index(0))
.with_merge_strategy(MergeStrategy::HtmlFallback)
.with_date_format(DateFormat::Iso8601)
.with_formula_mode(FormulaMode::Formula)
.include_hidden(true)
.with_range((0, 0), (10, 5));
assert!(matches!(
builder.config.sheet_selector,
SheetSelector::Index(0)
));
assert_eq!(builder.config.merge_strategy, MergeStrategy::HtmlFallback);
assert_eq!(builder.config.date_format, DateFormat::Iso8601);
assert_eq!(builder.config.formula_mode, FormulaMode::Formula);
assert!(builder.config.include_hidden);
assert!(builder.config.range.is_some());
}
#[test]
fn test_build_with_all_settings() {
let result = ConverterBuilder::new()
.with_sheet_selector(SheetSelector::Name("Sheet1".to_string()))
.with_merge_strategy(MergeStrategy::DataDuplication)
.with_date_format(DateFormat::Custom("%Y/%m/%d".to_string()))
.with_formula_mode(FormulaMode::CachedValue)
.include_hidden(false)
.with_range((0, 0), (99, 9))
.build();
assert!(result.is_ok());
}
#[test]
fn test_converter_new() {
let _converter = ConverterBuilder::new().build().unwrap();
}
#[test]
fn test_converter_convert_to_string_with_invalid_input() {
let converter = ConverterBuilder::new().build().unwrap();
let invalid_input: Vec<u8> = vec![];
let result = converter.convert_to_string(std::io::Cursor::new(invalid_input));
assert!(result.is_err());
}
}