table_extractor/
lib.rs

1pub mod detector;
2pub mod error;
3pub mod parser;
4pub mod writer;
5
6use error::Result;
7use std::io::Write;
8use std::str::FromStr;
9
10/// Maximum number of columns allowed in a table.
11/// Prevents out-of-memory attacks via excessively wide tables.
12const MAX_COLUMNS: usize = 10_000;
13
14/// Represents a parsed table with headers and data rows.
15///
16/// All rows must have the same number of columns as the header.
17/// Use [`Table::new_validated`] to create a table with automatic validation.
18///
19/// # Examples
20///
21/// ```
22/// use table_extractor::Table;
23///
24/// let table = Table::new(
25///     vec!["id".to_string(), "name".to_string()],
26///     vec![
27///         vec!["1".to_string(), "Alice".to_string()],
28///         vec!["2".to_string(), "Bob".to_string()],
29///     ],
30/// );
31///
32/// assert_eq!(table.column_count(), 2);
33/// assert!(!table.is_empty());
34/// ```
35#[derive(Debug, Clone, PartialEq)]
36pub struct Table {
37    /// Column headers
38    headers: Vec<String>,
39
40    /// Data rows, where each row should have the same length as headers
41    rows: Vec<Vec<String>>,
42}
43
44impl Table {
45    /// Creates a new table without validation.
46    ///
47    /// For safer construction with automatic validation, use [`Table::new_validated`].
48    ///
49    /// # Examples
50    ///
51    /// ```
52    /// use table_extractor::Table;
53    ///
54    /// let table = Table::new(
55    ///     vec!["id".to_string(), "name".to_string()],
56    ///     vec![vec!["1".to_string(), "Alice".to_string()]],
57    /// );
58    /// ```
59    pub fn new(headers: Vec<String>, rows: Vec<Vec<String>>) -> Self {
60        Self { headers, rows }
61    }
62
63    /// Validates that all rows have the same number of columns as headers.
64    ///
65    /// # Errors
66    ///
67    /// Returns [`error::Error::InconsistentColumns`] if any row has a different
68    /// column count than the header.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use table_extractor::Table;
74    ///
75    /// let table = Table::new(
76    ///     vec!["id".to_string(), "name".to_string()],
77    ///     vec![
78    ///         vec!["1".to_string(), "Alice".to_string()],
79    ///         vec!["2".to_string(), "Bob".to_string()],
80    ///     ],
81    /// );
82    ///
83    /// assert!(table.validate().is_ok());
84    ///
85    /// // Table with inconsistent columns
86    /// let bad_table = Table::new(
87    ///     vec!["id".to_string(), "name".to_string()],
88    ///     vec![vec!["1".to_string()]], // Missing column!
89    /// );
90    ///
91    /// assert!(bad_table.validate().is_err());
92    /// ```
93    pub fn validate(&self) -> Result<()> {
94        let expected = self.headers.len();
95        for (idx, row) in self.rows.iter().enumerate() {
96            if row.len() != expected {
97                return Err(error::Error::InconsistentColumns {
98                    row: idx + 1,
99                    expected,
100                    found: row.len(),
101                });
102            }
103        }
104        Ok(())
105    }
106
107    /// Creates a new table and validates it.
108    ///
109    /// This is the recommended way to create a table as it ensures data integrity
110    /// by validating column counts and enforcing limits.
111    ///
112    /// # Errors
113    ///
114    /// Returns an error if:
115    /// - The number of columns exceeds 10,000 ([`error::Error::InvalidFormat`])
116    /// - Any row has a different column count than the header ([`error::Error::InconsistentColumns`])
117    ///
118    /// # Examples
119    ///
120    /// ```
121    /// use table_extractor::Table;
122    ///
123    /// // Valid table
124    /// let table = Table::new_validated(
125    ///     vec!["id".to_string(), "name".to_string()],
126    ///     vec![
127    ///         vec!["1".to_string(), "Alice".to_string()],
128    ///         vec!["2".to_string(), "Bob".to_string()],
129    ///     ],
130    /// );
131    /// assert!(table.is_ok());
132    ///
133    /// // Invalid table (inconsistent columns)
134    /// let bad_table = Table::new_validated(
135    ///     vec!["id".to_string(), "name".to_string()],
136    ///     vec![vec!["1".to_string()]], // Missing column!
137    /// );
138    /// assert!(bad_table.is_err());
139    /// ```
140    pub fn new_validated(headers: Vec<String>, rows: Vec<Vec<String>>) -> Result<Self> {
141        // Check column count limit
142        if headers.len() > MAX_COLUMNS {
143            return Err(error::Error::InvalidFormat(format!(
144                "Too many columns: {} (maximum: {})",
145                headers.len(),
146                MAX_COLUMNS
147            )));
148        }
149
150        let table = Self { headers, rows };
151        table.validate()?;
152        Ok(table)
153    }
154
155    /// Returns `true` if the table contains no data rows.
156    ///
157    /// Note: A table with headers but no data rows is considered empty.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use table_extractor::Table;
163    ///
164    /// let empty_table = Table::new(
165    ///     vec!["id".to_string(), "name".to_string()],
166    ///     vec![],
167    /// );
168    /// assert!(empty_table.is_empty());
169    ///
170    /// let table_with_data = Table::new(
171    ///     vec!["id".to_string(), "name".to_string()],
172    ///     vec![vec!["1".to_string(), "Alice".to_string()]],
173    /// );
174    /// assert!(!table_with_data.is_empty());
175    /// ```
176    pub fn is_empty(&self) -> bool {
177        self.rows.is_empty()
178    }
179
180    /// Returns the number of columns in the table.
181    ///
182    /// This is equivalent to the length of the headers vector.
183    ///
184    /// # Examples
185    ///
186    /// ```
187    /// use table_extractor::Table;
188    ///
189    /// let table = Table::new(
190    ///     vec!["id".to_string(), "name".to_string(), "email".to_string()],
191    ///     vec![],
192    /// );
193    /// assert_eq!(table.column_count(), 3);
194    /// ```
195    pub fn column_count(&self) -> usize {
196        self.headers.len()
197    }
198
199    /// Returns a reference to the table headers.
200    ///
201    /// # Examples
202    ///
203    /// ```
204    /// use table_extractor::Table;
205    ///
206    /// let table = Table::new(
207    ///     vec!["id".to_string(), "name".to_string()],
208    ///     vec![],
209    /// );
210    /// assert_eq!(table.headers(), &["id", "name"]);
211    /// ```
212    pub fn headers(&self) -> &[String] {
213        &self.headers
214    }
215
216    /// Returns a reference to the table rows.
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use table_extractor::Table;
222    ///
223    /// let table = Table::new(
224    ///     vec!["id".to_string(), "name".to_string()],
225    ///     vec![
226    ///         vec!["1".to_string(), "Alice".to_string()],
227    ///         vec!["2".to_string(), "Bob".to_string()],
228    ///     ],
229    /// );
230    /// assert_eq!(table.rows().len(), 2);
231    /// assert_eq!(table.rows()[0], vec!["1", "Alice"]);
232    /// ```
233    pub fn rows(&self) -> &[Vec<String>] {
234        &self.rows
235    }
236
237    /// Consumes the table and returns the headers and rows.
238    ///
239    /// This is useful when you need ownership of the table's data.
240    ///
241    /// # Examples
242    ///
243    /// ```
244    /// use table_extractor::Table;
245    ///
246    /// let table = Table::new(
247    ///     vec!["id".to_string(), "name".to_string()],
248    ///     vec![vec!["1".to_string(), "Alice".to_string()]],
249    /// );
250    ///
251    /// let (headers, rows) = table.into_parts();
252    /// assert_eq!(headers, vec!["id", "name"]);
253    /// assert_eq!(rows.len(), 1);
254    /// ```
255    pub fn into_parts(self) -> (Vec<String>, Vec<Vec<String>>) {
256        (self.headers, self.rows)
257    }
258}
259
260/// Supported table formats for parsing and auto-detection.
261///
262/// This enum represents the various table formats that can be parsed by the library.
263/// Formats can be auto-detected or explicitly specified.
264///
265/// # Examples
266///
267/// ```
268/// use table_extractor::Format;
269/// use std::str::FromStr;
270///
271/// // Parse format from string
272/// let format = Format::from_str("markdown").unwrap();
273/// assert_eq!(format, Format::Markdown);
274///
275/// // Case insensitive
276/// let format = Format::from_str("MySQL").unwrap();
277/// assert_eq!(format, Format::MySQL);
278///
279/// // Aliases are supported
280/// let format = Format::from_str("psql").unwrap();
281/// assert_eq!(format, Format::PostgreSQL);
282///
283/// // Display trait converts back to canonical string
284/// assert_eq!(format.to_string(), "postgresql");
285/// assert_eq!(Format::CSV.to_string(), "csv");
286///
287/// // Round-trip conversion works
288/// let original = Format::Markdown;
289/// let parsed = Format::from_str(&original.to_string()).unwrap();
290/// assert_eq!(original, parsed);
291/// ```
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum Format {
294    /// Markdown table format with pipe delimiters (e.g., `| col1 | col2 |`)
295    Markdown,
296
297    /// MySQL CLI output format with box-drawing characters (e.g., `+----+----+`)
298    MySQL,
299
300    /// PostgreSQL CLI output format with simple separators (e.g., `----+----`)
301    PostgreSQL,
302
303    /// Comma-separated values (CSV) format
304    CSV,
305
306    /// Tab-separated values (TSV) format
307    TSV,
308}
309
310impl FromStr for Format {
311    type Err = String;
312
313    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
314        match s.to_lowercase().as_str() {
315            "markdown" | "md" => Ok(Format::Markdown),
316            "mysql" => Ok(Format::MySQL),
317            "postgres" | "postgresql" | "psql" => Ok(Format::PostgreSQL),
318            "csv" => Ok(Format::CSV),
319            "tsv" => Ok(Format::TSV),
320            _ => Err(format!(
321                "Invalid format: '{}'. Valid formats: markdown, mysql, postgres, csv, tsv",
322                s
323            )),
324        }
325    }
326}
327
328impl std::fmt::Display for Format {
329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330        let name = match self {
331            Format::Markdown => "markdown",
332            Format::MySQL => "mysql",
333            Format::PostgreSQL => "postgresql",
334            Format::CSV => "csv",
335            Format::TSV => "tsv",
336        };
337        write!(f, "{}", name)
338    }
339}
340
341/// Trait for parsing table data from various input formats.
342///
343/// Implement this trait to add support for new table formats.
344/// All parsers should validate column consistency by using [`Table::new_validated`].
345///
346/// # Examples
347///
348/// ```
349/// use table_extractor::{Parser, Table, error::Result};
350///
351/// struct CustomParser;
352///
353/// impl Parser for CustomParser {
354///     fn parse(&self, input: &str) -> Result<Table> {
355///         // Parse input into headers and rows
356///         let headers = vec!["col1".to_string(), "col2".to_string()];
357///         let rows = vec![vec!["val1".to_string(), "val2".to_string()]];
358///
359///         // Use new_validated to ensure data integrity
360///         Table::new_validated(headers, rows)
361///     }
362/// }
363/// ```
364pub trait Parser {
365    /// Parses the input string into a table.
366    ///
367    /// # Errors
368    ///
369    /// Returns an error if the input cannot be parsed or if the resulting
370    /// table fails validation (inconsistent columns, too many columns, etc.).
371    fn parse(&self, input: &str) -> Result<Table>;
372}
373
374/// Trait for writing table data to various output formats.
375///
376/// Implement this trait to add support for new output formats.
377///
378/// # Examples
379///
380/// ```
381/// use table_extractor::{Writer, Table, error::Result};
382/// use std::io::Write as IoWrite;
383///
384/// struct CustomWriter;
385///
386/// impl Writer for CustomWriter {
387///     fn write(&self, table: &Table, output: &mut dyn IoWrite) -> Result<()> {
388///         // Write headers
389///         writeln!(output, "{}", table.headers().join(","))?;
390///
391///         // Write rows
392///         for row in table.rows() {
393///             writeln!(output, "{}", row.join(","))?;
394///         }
395///
396///         Ok(())
397///     }
398/// }
399/// ```
400pub trait Writer {
401    /// Writes the table to the provided output stream.
402    ///
403    /// # Errors
404    ///
405    /// Returns an error if writing fails or if the table data is invalid
406    /// for the output format (e.g., delimiter conflicts).
407    fn write(&self, table: &Table, output: &mut dyn Write) -> Result<()>;
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413
414    #[test]
415    fn test_validate_consistent_columns() {
416        let table = Table::new(
417            vec!["id".to_string(), "name".to_string()],
418            vec![
419                vec!["1".to_string(), "Alice".to_string()],
420                vec!["2".to_string(), "Bob".to_string()],
421            ],
422        );
423        assert!(table.validate().is_ok());
424    }
425
426    #[test]
427    fn test_validate_inconsistent_columns() {
428        let table = Table::new(
429            vec!["id".to_string(), "name".to_string()],
430            vec![
431                vec!["1".to_string(), "Alice".to_string()],
432                vec!["2".to_string()], // Missing column
433            ],
434        );
435        let result = table.validate();
436        assert!(result.is_err());
437        if let Err(error::Error::InconsistentColumns {
438            row,
439            expected,
440            found,
441        }) = result
442        {
443            assert_eq!(row, 2);
444            assert_eq!(expected, 2);
445            assert_eq!(found, 1);
446        } else {
447            panic!("Expected InconsistentColumns error");
448        }
449    }
450
451    #[test]
452    fn test_new_validated_success() {
453        let result = Table::new_validated(
454            vec!["id".to_string(), "name".to_string()],
455            vec![
456                vec!["1".to_string(), "Alice".to_string()],
457                vec!["2".to_string(), "Bob".to_string()],
458            ],
459        );
460        assert!(result.is_ok());
461    }
462
463    #[test]
464    fn test_new_validated_fails_on_inconsistent_columns() {
465        let result = Table::new_validated(
466            vec!["id".to_string(), "name".to_string()],
467            vec![
468                vec!["1".to_string(), "Alice".to_string()],
469                vec!["2".to_string()], // Missing column
470            ],
471        );
472        assert!(result.is_err());
473    }
474
475    #[test]
476    fn test_validate_empty_table() {
477        let table = Table::new(vec![], vec![]);
478        assert!(table.validate().is_ok());
479    }
480
481    #[test]
482    fn test_validate_no_rows() {
483        let table = Table::new(vec!["id".to_string(), "name".to_string()], vec![]);
484        assert!(table.validate().is_ok());
485    }
486
487    #[test]
488    fn test_new_validated_rejects_too_many_columns() {
489        let headers: Vec<String> = (0..10001).map(|i| format!("col{}", i)).collect();
490        let result = Table::new_validated(headers, vec![]);
491        assert!(result.is_err());
492        if let Err(error::Error::InvalidFormat(msg)) = result {
493            assert!(msg.contains("Too many columns"));
494            assert!(msg.contains("10001"));
495            assert!(msg.contains("10000"));
496        } else {
497            panic!("Expected InvalidFormat error");
498        }
499    }
500
501    #[test]
502    fn test_new_validated_accepts_max_columns() {
503        let headers: Vec<String> = (0..10000).map(|i| format!("col{}", i)).collect();
504        let result = Table::new_validated(headers, vec![]);
505        assert!(result.is_ok());
506    }
507
508    #[test]
509    fn test_new_validated_accepts_just_under_max() {
510        let headers: Vec<String> = (0..9999).map(|i| format!("col{}", i)).collect();
511        let result = Table::new_validated(headers, vec![]);
512        assert!(result.is_ok());
513    }
514
515    #[test]
516    fn test_format_display() {
517        // Test Display trait for all Format variants
518        assert_eq!(Format::Markdown.to_string(), "markdown");
519        assert_eq!(Format::MySQL.to_string(), "mysql");
520        assert_eq!(Format::PostgreSQL.to_string(), "postgresql");
521        assert_eq!(Format::CSV.to_string(), "csv");
522        assert_eq!(Format::TSV.to_string(), "tsv");
523    }
524
525    #[test]
526    fn test_format_display_roundtrip() {
527        use std::str::FromStr;
528
529        // Test that Display output can be parsed back to the same Format
530        let formats = vec![
531            Format::Markdown,
532            Format::MySQL,
533            Format::PostgreSQL,
534            Format::CSV,
535            Format::TSV,
536        ];
537
538        for format in formats {
539            let string = format.to_string();
540            let parsed = Format::from_str(&string).unwrap();
541            assert_eq!(format, parsed, "Round-trip failed for {}", string);
542        }
543    }
544}