table_extractor/
lib.rs

1pub mod detector;
2pub mod error;
3pub mod parser;
4pub mod writer;
5
6use error::Result;
7use std::io::Write;
8use std::str::FromStr;
9
10/// Maximum number of columns allowed in a table.
11/// Prevents out-of-memory attacks via excessively wide tables.
12const MAX_COLUMNS: usize = 10_000;
13
14/// Represents a parsed table with headers and data rows.
15///
16/// All rows must have the same number of columns as the header.
17/// Use [`Table::new_validated`] to create a table with automatic validation.
18///
19/// # Examples
20///
21/// ```
22/// use table_extractor::Table;
23///
24/// let table = Table::new(
25///     vec!["id".to_string(), "name".to_string()],
26///     vec![
27///         vec!["1".to_string(), "Alice".to_string()],
28///         vec!["2".to_string(), "Bob".to_string()],
29///     ],
30/// );
31///
32/// assert_eq!(table.column_count(), 2);
33/// assert!(!table.is_empty());
34/// ```
35#[derive(Debug, Clone, PartialEq)]
36pub struct Table {
37    /// Column headers
38    pub headers: Vec<String>,
39
40    /// Data rows, where each row should have the same length as headers
41    pub rows: Vec<Vec<String>>,
42}
43
44impl Table {
45    /// Creates a new table without validation.
46    ///
47    /// For safer construction with automatic validation, use [`Table::new_validated`].
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// use table_extractor::Table;
53    ///
54    /// let table = Table::new(
55    ///     vec!["id".to_string(), "name".to_string()],
56    ///     vec![vec!["1".to_string(), "Alice".to_string()]],
57    /// );
58    /// ```
59    pub fn new(headers: Vec<String>, rows: Vec<Vec<String>>) -> Self {
60        Self { headers, rows }
61    }
62
63    /// Validates that all rows have the same number of columns as headers.
64    ///
65    /// # Errors
66    ///
67    /// Returns [`error::Error::InconsistentColumns`] if any row has a different
68    /// column count than the header.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use table_extractor::Table;
74    ///
75    /// let table = Table::new(
76    ///     vec!["id".to_string(), "name".to_string()],
77    ///     vec![
78    ///         vec!["1".to_string(), "Alice".to_string()],
79    ///         vec!["2".to_string(), "Bob".to_string()],
80    ///     ],
81    /// );
82    ///
83    /// assert!(table.validate().is_ok());
84    ///
85    /// // Table with inconsistent columns
86    /// let bad_table = Table::new(
87    ///     vec!["id".to_string(), "name".to_string()],
88    ///     vec![vec!["1".to_string()]], // Missing column!
89    /// );
90    ///
91    /// assert!(bad_table.validate().is_err());
92    /// ```
93    pub fn validate(&self) -> Result<()> {
94        let expected = self.headers.len();
95        for (idx, row) in self.rows.iter().enumerate() {
96            if row.len() != expected {
97                return Err(error::Error::InconsistentColumns {
98                    row: idx + 1,
99                    expected,
100                    found: row.len(),
101                });
102            }
103        }
104        Ok(())
105    }
106
107    /// Creates a new table and validates it.
108    ///
109    /// This is the recommended way to create a table as it ensures data integrity
110    /// by validating column counts and enforcing limits.
111    ///
112    /// # Errors
113    ///
114    /// Returns an error if:
115    /// - The number of columns exceeds 10,000 ([`error::Error::InvalidFormat`])
116    /// - Any row has a different column count than the header ([`error::Error::InconsistentColumns`])
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use table_extractor::Table;
122    ///
123    /// // Valid table
124    /// let table = Table::new_validated(
125    ///     vec!["id".to_string(), "name".to_string()],
126    ///     vec![
127    ///         vec!["1".to_string(), "Alice".to_string()],
128    ///         vec!["2".to_string(), "Bob".to_string()],
129    ///     ],
130    /// );
131    /// assert!(table.is_ok());
132    ///
133    /// // Invalid table (inconsistent columns)
134    /// let bad_table = Table::new_validated(
135    ///     vec!["id".to_string(), "name".to_string()],
136    ///     vec![vec!["1".to_string()]], // Missing column!
137    /// );
138    /// assert!(bad_table.is_err());
139    /// ```
140    pub fn new_validated(headers: Vec<String>, rows: Vec<Vec<String>>) -> Result<Self> {
141        // Check column count limit
142        if headers.len() > MAX_COLUMNS {
143            return Err(error::Error::InvalidFormat(format!(
144                "Too many columns: {} (maximum: {})",
145                headers.len(),
146                MAX_COLUMNS
147            )));
148        }
149
150        let table = Self { headers, rows };
151        table.validate()?;
152        Ok(table)
153    }
154
155    /// Returns `true` if the table contains no data rows.
156    ///
157    /// Note: A table with headers but no data rows is considered empty.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use table_extractor::Table;
163    ///
164    /// let empty_table = Table::new(
165    ///     vec!["id".to_string(), "name".to_string()],
166    ///     vec![],
167    /// );
168    /// assert!(empty_table.is_empty());
169    ///
170    /// let table_with_data = Table::new(
171    ///     vec!["id".to_string(), "name".to_string()],
172    ///     vec![vec!["1".to_string(), "Alice".to_string()]],
173    /// );
174    /// assert!(!table_with_data.is_empty());
175    /// ```
176    pub fn is_empty(&self) -> bool {
177        self.rows.is_empty()
178    }
179
180    /// Returns the number of columns in the table.
181    ///
182    /// This is equivalent to the length of the headers vector.
183    ///
184    /// # Examples
185    ///
186    /// ```
187    /// use table_extractor::Table;
188    ///
189    /// let table = Table::new(
190    ///     vec!["id".to_string(), "name".to_string(), "email".to_string()],
191    ///     vec![],
192    /// );
193    /// assert_eq!(table.column_count(), 3);
194    /// ```
195    pub fn column_count(&self) -> usize {
196        self.headers.len()
197    }
198}
199
200/// Supported table formats for parsing and auto-detection.
201///
202/// This enum represents the various table formats that can be parsed by the library.
203/// Formats can be auto-detected or explicitly specified.
204///
205/// # Examples
206///
207/// ```
208/// use table_extractor::Format;
209/// use std::str::FromStr;
210///
211/// // Parse format from string
212/// let format = Format::from_str("markdown").unwrap();
213/// assert_eq!(format, Format::Markdown);
214///
215/// // Case insensitive
216/// let format = Format::from_str("MySQL").unwrap();
217/// assert_eq!(format, Format::MySQL);
218///
219/// // Aliases are supported
220/// let format = Format::from_str("psql").unwrap();
221/// assert_eq!(format, Format::PostgreSQL);
222///
223/// // Display trait converts back to canonical string
224/// assert_eq!(format.to_string(), "postgresql");
225/// assert_eq!(Format::CSV.to_string(), "csv");
226///
227/// // Round-trip conversion works
228/// let original = Format::Markdown;
229/// let parsed = Format::from_str(&original.to_string()).unwrap();
230/// assert_eq!(original, parsed);
231/// ```
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum Format {
234    /// Markdown table format with pipe delimiters (e.g., `| col1 | col2 |`)
235    Markdown,
236
237    /// MySQL CLI output format with box-drawing characters (e.g., `+----+----+`)
238    MySQL,
239
240    /// PostgreSQL CLI output format with simple separators (e.g., `----+----`)
241    PostgreSQL,
242
243    /// Comma-separated values (CSV) format
244    CSV,
245
246    /// Tab-separated values (TSV) format
247    TSV,
248}
249
250impl FromStr for Format {
251    type Err = String;
252
253    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
254        match s.to_lowercase().as_str() {
255            "markdown" | "md" => Ok(Format::Markdown),
256            "mysql" => Ok(Format::MySQL),
257            "postgres" | "postgresql" | "psql" => Ok(Format::PostgreSQL),
258            "csv" => Ok(Format::CSV),
259            "tsv" => Ok(Format::TSV),
260            _ => Err(format!(
261                "Invalid format: '{}'. Valid formats: markdown, mysql, postgres, csv, tsv",
262                s
263            )),
264        }
265    }
266}
267
268impl std::fmt::Display for Format {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        let name = match self {
271            Format::Markdown => "markdown",
272            Format::MySQL => "mysql",
273            Format::PostgreSQL => "postgresql",
274            Format::CSV => "csv",
275            Format::TSV => "tsv",
276        };
277        write!(f, "{}", name)
278    }
279}
280
281/// Trait for parsing table data from various input formats.
282///
283/// Implement this trait to add support for new table formats.
284/// All parsers should validate column consistency by using [`Table::new_validated`].
285///
286/// # Examples
287///
288/// ```
289/// use table_extractor::{Parser, Table, error::Result};
290///
291/// struct CustomParser;
292///
293/// impl Parser for CustomParser {
294///     fn parse(&self, input: &str) -> Result<Table> {
295///         // Parse input into headers and rows
296///         let headers = vec!["col1".to_string(), "col2".to_string()];
297///         let rows = vec![vec!["val1".to_string(), "val2".to_string()]];
298///
299///         // Use new_validated to ensure data integrity
300///         Table::new_validated(headers, rows)
301///     }
302/// }
303/// ```
304pub trait Parser {
305    /// Parses the input string into a table.
306    ///
307    /// # Errors
308    ///
309    /// Returns an error if the input cannot be parsed or if the resulting
310    /// table fails validation (inconsistent columns, too many columns, etc.).
311    fn parse(&self, input: &str) -> Result<Table>;
312}
313
314/// Trait for writing table data to various output formats.
315///
316/// Implement this trait to add support for new output formats.
317///
318/// # Examples
319///
320/// ```
321/// use table_extractor::{Writer, Table, error::Result};
322/// use std::io::Write as IoWrite;
323///
324/// struct CustomWriter;
325///
326/// impl Writer for CustomWriter {
327///     fn write(&self, table: &Table, output: &mut dyn IoWrite) -> Result<()> {
328///         // Write headers
329///         writeln!(output, "{}", table.headers.join(","))?;
330///
331///         // Write rows
332///         for row in &table.rows {
333///             writeln!(output, "{}", row.join(","))?;
334///         }
335///
336///         Ok(())
337///     }
338/// }
339/// ```
340pub trait Writer {
341    /// Writes the table to the provided output stream.
342    ///
343    /// # Errors
344    ///
345    /// Returns an error if writing fails or if the table data is invalid
346    /// for the output format (e.g., delimiter conflicts).
347    fn write(&self, table: &Table, output: &mut dyn Write) -> Result<()>;
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    #[test]
355    fn test_validate_consistent_columns() {
356        let table = Table::new(
357            vec!["id".to_string(), "name".to_string()],
358            vec![
359                vec!["1".to_string(), "Alice".to_string()],
360                vec!["2".to_string(), "Bob".to_string()],
361            ],
362        );
363        assert!(table.validate().is_ok());
364    }
365
366    #[test]
367    fn test_validate_inconsistent_columns() {
368        let table = Table::new(
369            vec!["id".to_string(), "name".to_string()],
370            vec![
371                vec!["1".to_string(), "Alice".to_string()],
372                vec!["2".to_string()], // Missing column
373            ],
374        );
375        let result = table.validate();
376        assert!(result.is_err());
377        if let Err(error::Error::InconsistentColumns {
378            row,
379            expected,
380            found,
381        }) = result
382        {
383            assert_eq!(row, 2);
384            assert_eq!(expected, 2);
385            assert_eq!(found, 1);
386        } else {
387            panic!("Expected InconsistentColumns error");
388        }
389    }
390
391    #[test]
392    fn test_new_validated_success() {
393        let result = Table::new_validated(
394            vec!["id".to_string(), "name".to_string()],
395            vec![
396                vec!["1".to_string(), "Alice".to_string()],
397                vec!["2".to_string(), "Bob".to_string()],
398            ],
399        );
400        assert!(result.is_ok());
401    }
402
403    #[test]
404    fn test_new_validated_fails_on_inconsistent_columns() {
405        let result = Table::new_validated(
406            vec!["id".to_string(), "name".to_string()],
407            vec![
408                vec!["1".to_string(), "Alice".to_string()],
409                vec!["2".to_string()], // Missing column
410            ],
411        );
412        assert!(result.is_err());
413    }
414
415    #[test]
416    fn test_validate_empty_table() {
417        let table = Table::new(vec![], vec![]);
418        assert!(table.validate().is_ok());
419    }
420
421    #[test]
422    fn test_validate_no_rows() {
423        let table = Table::new(vec!["id".to_string(), "name".to_string()], vec![]);
424        assert!(table.validate().is_ok());
425    }
426
427    #[test]
428    fn test_new_validated_rejects_too_many_columns() {
429        let headers: Vec<String> = (0..10001).map(|i| format!("col{}", i)).collect();
430        let result = Table::new_validated(headers, vec![]);
431        assert!(result.is_err());
432        if let Err(error::Error::InvalidFormat(msg)) = result {
433            assert!(msg.contains("Too many columns"));
434            assert!(msg.contains("10001"));
435            assert!(msg.contains("10000"));
436        } else {
437            panic!("Expected InvalidFormat error");
438        }
439    }
440
441    #[test]
442    fn test_new_validated_accepts_max_columns() {
443        let headers: Vec<String> = (0..10000).map(|i| format!("col{}", i)).collect();
444        let result = Table::new_validated(headers, vec![]);
445        assert!(result.is_ok());
446    }
447
448    #[test]
449    fn test_new_validated_accepts_just_under_max() {
450        let headers: Vec<String> = (0..9999).map(|i| format!("col{}", i)).collect();
451        let result = Table::new_validated(headers, vec![]);
452        assert!(result.is_ok());
453    }
454
455    #[test]
456    fn test_format_display() {
457        // Test Display trait for all Format variants
458        assert_eq!(Format::Markdown.to_string(), "markdown");
459        assert_eq!(Format::MySQL.to_string(), "mysql");
460        assert_eq!(Format::PostgreSQL.to_string(), "postgresql");
461        assert_eq!(Format::CSV.to_string(), "csv");
462        assert_eq!(Format::TSV.to_string(), "tsv");
463    }
464
465    #[test]
466    fn test_format_display_roundtrip() {
467        use std::str::FromStr;
468
469        // Test that Display output can be parsed back to the same Format
470        let formats = vec![
471            Format::Markdown,
472            Format::MySQL,
473            Format::PostgreSQL,
474            Format::CSV,
475            Format::TSV,
476        ];
477
478        for format in formats {
479            let string = format.to_string();
480            let parsed = Format::from_str(&string).unwrap();
481            assert_eq!(format, parsed, "Round-trip failed for {}", string);
482        }
483    }
484}