csvmd/
lib.rs

1//! A library for converting CSV data to Markdown tables.
2//!
3//! This crate provides functionality to convert CSV (Comma-Separated Values) data
4//! into properly formatted Markdown tables. It handles various CSV complexities
5//! including quoted fields, embedded newlines, and pipe characters.
6//!
7//! # Features
8//!
9//! - Stream processing for memory efficiency with large files
10//! - Proper escaping of Markdown special characters
11//! - Support for uneven column counts across rows
12//! - Comprehensive error handling
13//!
14//! # Example
15//!
16//! ```rust
17//! use csvmd::{csv_to_markdown, Config};
18//! use std::io::Cursor;
19//!
20//! let csv_data = "Name,Age\nJohn,25\nJane,30";
21//! let input = Cursor::new(csv_data);
22//! let config = Config::default();
23//! let result = csv_to_markdown(input, config).unwrap();
24//! println!("{}", result);
25//! ```
26
27pub mod error;
28
29use csv::ReaderBuilder;
30use error::Result;
31use std::fmt::Write as FmtWrite;
32use std::io::{Read, Seek, SeekFrom, Write};
33
34/// Header alignment options for Markdown tables.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum HeaderAlignment {
37    /// Left-aligned headers (default): `| --- |`
38    Left,
39    /// Center-aligned headers: `| :---: |`
40    Center,
41    /// Right-aligned headers: `| ---: |`
42    Right,
43}
44
45/// Configuration for CSV to Markdown conversion.
46#[derive(Debug, Clone)]
47pub struct Config {
48    /// Whether the CSV has headers (affects separator line placement).
49    pub has_headers: bool,
50    /// Whether to allow flexible column counts.
51    pub flexible: bool,
52    /// CSV field delimiter character.
53    pub delimiter: u8,
54    /// Header alignment for Markdown table.
55    pub header_alignment: HeaderAlignment,
56}
57
58impl Default for Config {
59    fn default() -> Self {
60        Self {
61            has_headers: true,
62            flexible: true,
63            delimiter: b',',
64            header_alignment: HeaderAlignment::Left,
65        }
66    }
67}
68
69/// Convert CSV data to a Markdown table string.
70///
71/// This function reads CSV data from the provided reader and converts it to
72/// a Markdown table format. It processes the data in a streaming fashion to
73/// handle large files efficiently.
74///
75/// # Arguments
76///
77/// * `input` - A reader containing CSV data
78/// * `config` - Configuration options for the conversion
79///
80/// # Returns
81///
82/// A string containing the formatted Markdown table.
83///
84/// # Errors
85///
86/// Returns `CsvMdError` if:
87/// - The input cannot be read
88/// - The CSV data is malformed
89/// - Memory allocation fails during processing
90///
91/// # Example
92///
93/// ```rust
94/// use csvmd::{csv_to_markdown, Config};
95/// use std::io::Cursor;
96///
97/// let csv_data = "Name,Age\nJohn,25\nJane,30";
98/// let input = Cursor::new(csv_data);
99/// let config = Config::default();
100/// let result = csv_to_markdown(input, config)?;
101/// assert!(result.contains("| Name | Age |"));
102/// # Ok::<(), csvmd::error::CsvMdError>(())
103/// ```
104pub fn csv_to_markdown<R: Read>(input: R, config: Config) -> Result<String> {
105    let mut reader = ReaderBuilder::new()
106        .has_headers(false)
107        .flexible(config.flexible)
108        .delimiter(config.delimiter)
109        .from_reader(input);
110
111    let mut records: Vec<Vec<String>> = Vec::new();
112    let mut max_cols = 0;
113
114    // First pass: collect all records and determine max column count
115    for result in reader.records() {
116        let record = result?;
117        let row: Vec<String> = record.iter().map(escape_markdown_cell).collect();
118
119        max_cols = max_cols.max(row.len());
120        records.push(row);
121    }
122
123    if records.is_empty() {
124        return Ok(String::new());
125    }
126
127    // Estimate output size to reduce allocations
128    let estimated_size = estimate_output_size(&records, max_cols);
129    let mut output = String::with_capacity(estimated_size);
130
131    // Write the table
132    for (i, record) in records.iter().enumerate() {
133        write_table_row(&mut output, record, max_cols)?;
134
135        // Add header separator after first row if configured
136        if i == 0 && config.has_headers {
137            write_header_separator(&mut output, max_cols, config.header_alignment)?;
138        }
139    }
140
141    Ok(output)
142}
143
144/// Convert CSV data to Markdown and write directly to output.
145///
146/// This streaming version uses a two-pass approach:
147/// 1. First pass: determine the maximum column count
148/// 2. Second pass: stream output with correct table formatting
149///
150/// This provides memory efficiency for large files while ensuring correct
151/// Markdown table structure.
152///
153/// # Arguments
154///
155/// * `input` - A reader containing CSV data
156/// * `output` - A writer where the Markdown table will be written
157/// * `config` - Configuration options for the conversion
158///
159/// # Errors
160///
161/// Returns `CsvMdError` if reading, parsing, or writing fails.
162pub fn csv_to_markdown_streaming<R: Read, W: Write>(
163    mut input: R,
164    mut output: W,
165    config: Config,
166) -> Result<()> {
167    // First, we need to read the input to determine max columns
168    // Since we need to read twice, we'll read all data into memory first
169    let mut buffer = Vec::new();
170    input.read_to_end(&mut buffer)?;
171
172    // First pass: determine max column count
173    let max_cols = {
174        let cursor = std::io::Cursor::new(&buffer);
175        let mut reader = ReaderBuilder::new()
176            .has_headers(false)
177            .flexible(config.flexible)
178            .delimiter(config.delimiter)
179            .from_reader(cursor);
180
181        let mut max_cols = 0;
182        for result in reader.records() {
183            let record = result?;
184            max_cols = max_cols.max(record.len());
185        }
186        max_cols
187    };
188
189    // Second pass: stream output with correct column count
190    let cursor = std::io::Cursor::new(&buffer);
191    let mut reader = ReaderBuilder::new()
192        .has_headers(false)
193        .flexible(config.flexible)
194        .delimiter(config.delimiter)
195        .from_reader(cursor);
196
197    let mut first_row = true;
198
199    for result in reader.records() {
200        let record = result?;
201        let row: Vec<String> = record.iter().map(escape_markdown_cell).collect();
202
203        // Write the row with correct column count
204        write_table_row_to_writer(&mut output, &row, max_cols)?;
205
206        // Add header separator after first row if configured
207        if first_row && config.has_headers {
208            write_header_separator_to_writer(&mut output, max_cols, config.header_alignment)?;
209            first_row = false;
210        }
211    }
212
213    output.flush()?;
214    Ok(())
215}
216
217/// Convert CSV data to Markdown and write directly to output using a seekable input.
218///
219/// For seekable inputs (e.g., files), this variant avoids buffering the entire
220/// input into memory. It performs two passes by rewinding the reader between
221/// passes to compute the maximum column count and then write the output.
222pub fn csv_to_markdown_streaming_seekable<R: Read + Seek, W: Write>(
223    mut input: R,
224    mut output: W,
225    config: Config,
226) -> Result<()> {
227    // First pass: determine max column count
228    input.seek(SeekFrom::Start(0))?;
229    let mut reader = ReaderBuilder::new()
230        .has_headers(false)
231        .flexible(config.flexible)
232        .delimiter(config.delimiter)
233        .from_reader(&mut input);
234
235    let mut max_cols = 0;
236    for result in reader.records() {
237        let record = result?;
238        max_cols = max_cols.max(record.len());
239    }
240
241    // Second pass: rewind and stream output with correct column count
242    drop(reader);
243    input.seek(SeekFrom::Start(0))?;
244    let mut reader = ReaderBuilder::new()
245        .has_headers(false)
246        .flexible(config.flexible)
247        .delimiter(config.delimiter)
248        .from_reader(&mut input);
249
250    let mut first_row = true;
251    for result in reader.records() {
252        let record = result?;
253        let row: Vec<String> = record.iter().map(escape_markdown_cell).collect();
254        write_table_row_to_writer(&mut output, &row, max_cols)?;
255
256        if first_row && config.has_headers {
257            write_header_separator_to_writer(&mut output, max_cols, config.header_alignment)?;
258            first_row = false;
259        }
260    }
261
262    output.flush()?;
263    Ok(())
264}
265
266/// Escape Markdown special characters in a CSV cell.
267///
268/// This function handles:
269/// - Pipe characters (`|`) → escaped as `\|`
270/// - Newlines (`\n`) → converted to `<br>` tags
271/// - Carriage returns (`\r`) → removed
272fn escape_markdown_cell(field: &str) -> String {
273    field
274        .replace('|', "\\|")
275        .replace('\n', "<br>")
276        .replace('\r', "")
277}
278
279/// Write a table row to a string buffer.
280fn write_table_row(output: &mut String, row: &[String], max_cols: usize) -> Result<()> {
281    output.push('|');
282
283    for i in 0..max_cols {
284        let cell = row.get(i).map(String::as_str).unwrap_or("");
285        write!(output, " {} |", cell)?;
286    }
287
288    output.push('\n');
289    Ok(())
290}
291
292/// Write a table row directly to a writer.
293fn write_table_row_to_writer<W: Write>(
294    output: &mut W,
295    row: &[String],
296    max_cols: usize,
297) -> Result<()> {
298    write!(output, "|")?;
299
300    for i in 0..max_cols {
301        let cell = row.get(i).map(String::as_str).unwrap_or("");
302        write!(output, " {} |", cell)?;
303    }
304
305    writeln!(output)?;
306    Ok(())
307}
308
309/// Write the header separator line to a string buffer.
310fn write_header_separator(
311    output: &mut String,
312    max_cols: usize,
313    alignment: HeaderAlignment,
314) -> Result<()> {
315    output.push('|');
316
317    let separator = match alignment {
318        HeaderAlignment::Left => " --- |",
319        HeaderAlignment::Center => " :---: |",
320        HeaderAlignment::Right => " ---: |",
321    };
322
323    for _ in 0..max_cols {
324        output.push_str(separator);
325    }
326
327    output.push('\n');
328    Ok(())
329}
330
331/// Write the header separator line directly to a writer.
332fn write_header_separator_to_writer<W: Write>(
333    output: &mut W,
334    max_cols: usize,
335    alignment: HeaderAlignment,
336) -> Result<()> {
337    write!(output, "|")?;
338
339    let separator = match alignment {
340        HeaderAlignment::Left => " --- |",
341        HeaderAlignment::Center => " :---: |",
342        HeaderAlignment::Right => " ---: |",
343    };
344
345    for _ in 0..max_cols {
346        write!(output, "{}", separator)?;
347    }
348
349    writeln!(output)?;
350    Ok(())
351}
352
353/// Estimate the output size to pre-allocate string capacity.
354fn estimate_output_size(records: &[Vec<String>], max_cols: usize) -> usize {
355    let avg_cell_size = records
356        .iter()
357        .flat_map(|row| row.iter())
358        .map(|cell| cell.len())
359        .sum::<usize>()
360        / records.len().max(1);
361
362    // Rough estimate: (avg_cell_size + 3) * cols * rows + separators
363    (avg_cell_size + 3) * max_cols * records.len() + (max_cols * 6) + 100
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use std::io::Cursor;
370
371    #[test]
372    fn test_escape_markdown_cell() {
373        assert_eq!(escape_markdown_cell("simple"), "simple");
374        assert_eq!(escape_markdown_cell("with|pipe"), "with\\|pipe");
375        assert_eq!(escape_markdown_cell("with\nlinebreak"), "with<br>linebreak");
376        assert_eq!(escape_markdown_cell("with\r\nwindows"), "with<br>windows");
377        assert_eq!(escape_markdown_cell(""), "");
378    }
379
380    #[test]
381    fn test_simple_csv() {
382        let csv_data = "Name,Age\nJohn,25\nJane,30";
383        let input = Cursor::new(csv_data);
384        let config = Config::default();
385        let result = csv_to_markdown(input, config).unwrap();
386
387        let expected = "| Name | Age |\n| --- | --- |\n| John | 25 |\n| Jane | 30 |\n";
388        assert_eq!(result, expected);
389    }
390
391    #[test]
392    fn test_empty_csv() {
393        let csv_data = "";
394        let input = Cursor::new(csv_data);
395        let config = Config::default();
396        let result = csv_to_markdown(input, config).unwrap();
397
398        assert_eq!(result, "");
399    }
400
401    #[test]
402    fn test_csv_with_line_breaks() {
403        let csv_data = "Name,Description\nJohn,\"Line 1\nLine 2\"";
404        let input = Cursor::new(csv_data);
405        let config = Config::default();
406        let result = csv_to_markdown(input, config).unwrap();
407
408        let expected = "| Name | Description |\n| --- | --- |\n| John | Line 1<br>Line 2 |\n";
409        assert_eq!(result, expected);
410    }
411
412    #[test]
413    fn test_csv_with_pipes() {
414        let csv_data = "Name,Description\nJohn,\"Has | pipe\"";
415        let input = Cursor::new(csv_data);
416        let config = Config::default();
417        let result = csv_to_markdown(input, config).unwrap();
418
419        let expected = "| Name | Description |\n| --- | --- |\n| John | Has \\| pipe |\n";
420        assert_eq!(result, expected);
421    }
422
423    #[test]
424    fn test_csv_with_uneven_columns() {
425        let csv_data = "A,B,C\nX,Y\nP,Q,R,S";
426        let input = Cursor::new(csv_data);
427        let config = Config::default();
428        let result = csv_to_markdown(input, config).unwrap();
429
430        let expected =
431            "| A | B | C |  |\n| --- | --- | --- | --- |\n| X | Y |  |  |\n| P | Q | R | S |\n";
432        assert_eq!(result, expected);
433    }
434
435    #[test]
436    fn test_single_row_csv() {
437        let csv_data = "Name,Age,City";
438        let input = Cursor::new(csv_data);
439        let config = Config::default();
440        let result = csv_to_markdown(input, config).unwrap();
441
442        let expected = "| Name | Age | City |\n| --- | --- | --- |\n";
443        assert_eq!(result, expected);
444    }
445
446    #[test]
447    fn test_csv_with_empty_cells() {
448        let csv_data = "Name,Age,City\nJohn,,NYC\n,25,";
449        let input = Cursor::new(csv_data);
450        let config = Config::default();
451        let result = csv_to_markdown(input, config).unwrap();
452
453        let expected =
454            "| Name | Age | City |\n| --- | --- | --- |\n| John |  | NYC |\n|  | 25 |  |\n";
455        assert_eq!(result, expected);
456    }
457
458    #[test]
459    fn test_csv_with_quotes_and_commas() {
460        let csv_data = "Name,Description\nJohn,\"Smith, Jr.\"\nJane,\"O'Connor\"";
461        let input = Cursor::new(csv_data);
462        let config = Config::default();
463        let result = csv_to_markdown(input, config).unwrap();
464
465        let expected =
466            "| Name | Description |\n| --- | --- |\n| John | Smith, Jr. |\n| Jane | O'Connor |\n";
467        assert_eq!(result, expected);
468    }
469
470    #[test]
471    fn test_csv_with_special_characters() {
472        let csv_data = "Symbol,Unicode\n★,\"U+2605\"\n♠,\"U+2660\"";
473        let input = Cursor::new(csv_data);
474        let config = Config::default();
475        let result = csv_to_markdown(input, config).unwrap();
476
477        let expected = "| Symbol | Unicode |\n| --- | --- |\n| ★ | U+2605 |\n| ♠ | U+2660 |\n";
478        assert_eq!(result, expected);
479    }
480
481    #[test]
482    fn test_csv_with_whitespace() {
483        let csv_data = " Name , Age \n John , 25 \n Jane , 30 ";
484        let input = Cursor::new(csv_data);
485        let config = Config::default();
486        let result = csv_to_markdown(input, config).unwrap();
487
488        let expected = "|  Name  |  Age  |\n| --- | --- |\n|  John  |  25  |\n|  Jane  |  30  |\n";
489        assert_eq!(result, expected);
490    }
491
492    #[test]
493    fn test_no_headers_config() {
494        let csv_data = "Data1,Data2\nValue1,Value2";
495        let input = Cursor::new(csv_data);
496        let config = Config {
497            has_headers: false,
498            ..Config::default()
499        };
500        let result = csv_to_markdown(input, config).unwrap();
501
502        // Should not have separator line when no headers
503        let expected = "| Data1 | Data2 |\n| Value1 | Value2 |\n";
504        assert_eq!(result, expected);
505    }
506
507    #[test]
508    fn test_custom_delimiter() {
509        let csv_data = "Name;Age\nJohn;25\nJane;30";
510        let input = Cursor::new(csv_data);
511        let config = Config {
512            delimiter: b';',
513            ..Config::default()
514        };
515        let result = csv_to_markdown(input, config).unwrap();
516
517        let expected = "| Name | Age |\n| --- | --- |\n| John | 25 |\n| Jane | 30 |\n";
518        assert_eq!(result, expected);
519    }
520
521    #[test]
522    fn test_streaming_mode() {
523        let csv_data = "Name,Age\nJohn,25\nJane,30";
524        let input = Cursor::new(csv_data);
525        let mut output = Vec::new();
526        let config = Config::default();
527
528        csv_to_markdown_streaming(input, &mut output, config).unwrap();
529
530        let result = String::from_utf8(output).unwrap();
531        let expected = "| Name | Age |\n| --- | --- |\n| John | 25 |\n| Jane | 30 |\n";
532        assert_eq!(result, expected);
533    }
534
535    #[test]
536    fn test_streaming_mode_no_headers() {
537        let csv_data = "Data1,Data2\nValue1,Value2";
538        let input = Cursor::new(csv_data);
539        let mut output = Vec::new();
540        let config = Config {
541            has_headers: false,
542            ..Config::default()
543        };
544
545        csv_to_markdown_streaming(input, &mut output, config).unwrap();
546
547        let result = String::from_utf8(output).unwrap();
548        let expected = "| Data1 | Data2 |\n| Value1 | Value2 |\n";
549        assert_eq!(result, expected);
550    }
551
552    #[test]
553    fn test_streaming_mode_uneven_columns() {
554        // This test exposes the current streaming bug: early rows are malformed
555        // when later rows have more columns
556        let csv_data = "A,B\nX,Y,Z\nP,Q,R,S";
557        let input = Cursor::new(csv_data);
558        let mut output = Vec::new();
559        let config = Config::default();
560
561        csv_to_markdown_streaming(input, &mut output, config).unwrap();
562
563        let result = String::from_utf8(output).unwrap();
564
565        // Expected: all rows should have 4 columns (max from any row)
566        let expected =
567            "| A | B |  |  |\n| --- | --- | --- | --- |\n| X | Y | Z |  |\n| P | Q | R | S |\n";
568
569        // Fixed: streaming now uses two-pass approach to determine max_cols correctly
570        assert_eq!(result, expected);
571    }
572
573    #[test]
574    fn test_header_alignment_left() {
575        let csv_data = "Name,Age\nJohn,25\nJane,30";
576        let input = Cursor::new(csv_data);
577        let config = Config {
578            header_alignment: HeaderAlignment::Left,
579            ..Config::default()
580        };
581        let result = csv_to_markdown(input, config).unwrap();
582
583        let expected = "| Name | Age |\n| --- | --- |\n| John | 25 |\n| Jane | 30 |\n";
584        assert_eq!(result, expected);
585    }
586
587    #[test]
588    fn test_header_alignment_center() {
589        let csv_data = "Name,Age\nJohn,25\nJane,30";
590        let input = Cursor::new(csv_data);
591        let config = Config {
592            header_alignment: HeaderAlignment::Center,
593            ..Config::default()
594        };
595        let result = csv_to_markdown(input, config).unwrap();
596
597        let expected = "| Name | Age |\n| :---: | :---: |\n| John | 25 |\n| Jane | 30 |\n";
598        assert_eq!(result, expected);
599    }
600
601    #[test]
602    fn test_header_alignment_right() {
603        let csv_data = "Name,Age\nJohn,25\nJane,30";
604        let input = Cursor::new(csv_data);
605        let config = Config {
606            header_alignment: HeaderAlignment::Right,
607            ..Config::default()
608        };
609        let result = csv_to_markdown(input, config).unwrap();
610
611        let expected = "| Name | Age |\n| ---: | ---: |\n| John | 25 |\n| Jane | 30 |\n";
612        assert_eq!(result, expected);
613    }
614
615    #[test]
616    fn test_streaming_header_alignment_center() {
617        let csv_data = "Name,Age\nJohn,25\nJane,30";
618        let input = Cursor::new(csv_data);
619        let mut output = Vec::new();
620        let config = Config {
621            header_alignment: HeaderAlignment::Center,
622            ..Config::default()
623        };
624
625        csv_to_markdown_streaming(input, &mut output, config).unwrap();
626
627        let result = String::from_utf8(output).unwrap();
628        let expected = "| Name | Age |\n| :---: | :---: |\n| John | 25 |\n| Jane | 30 |\n";
629        assert_eq!(result, expected);
630    }
631
632    #[test]
633    fn test_header_alignment_no_headers() {
634        let csv_data = "Data1,Data2\nValue1,Value2";
635        let input = Cursor::new(csv_data);
636        let config = Config {
637            has_headers: false,
638            header_alignment: HeaderAlignment::Center, // Should be ignored
639            ..Config::default()
640        };
641        let result = csv_to_markdown(input, config).unwrap();
642
643        // Should not have separator line when no headers, regardless of alignment
644        let expected = "| Data1 | Data2 |\n| Value1 | Value2 |\n";
645        assert_eq!(result, expected);
646    }
647}