table_extractor/lib.rs
1pub mod detector;
2pub mod error;
3pub mod parser;
4pub mod writer;
5
6use error::Result;
7use std::io::Write;
8use std::str::FromStr;
9
10/// Maximum number of columns allowed in a table.
11/// Prevents out-of-memory attacks via excessively wide tables.
12const MAX_COLUMNS: usize = 10_000;
13
14/// Represents a parsed table with headers and data rows.
15///
16/// All rows must have the same number of columns as the header.
17/// Use [`Table::new_validated`] to create a table with automatic validation.
18///
19/// # Examples
20///
21/// ```
22/// use table_extractor::Table;
23///
24/// let table = Table::new(
25/// vec!["id".to_string(), "name".to_string()],
26/// vec![
27/// vec!["1".to_string(), "Alice".to_string()],
28/// vec!["2".to_string(), "Bob".to_string()],
29/// ],
30/// );
31///
32/// assert_eq!(table.column_count(), 2);
33/// assert!(!table.is_empty());
34/// ```
35#[derive(Debug, Clone, PartialEq)]
36pub struct Table {
37 /// Column headers
38 pub headers: Vec<String>,
39
40 /// Data rows, where each row should have the same length as headers
41 pub rows: Vec<Vec<String>>,
42}
43
44impl Table {
45 /// Creates a new table without validation.
46 ///
47 /// For safer construction with automatic validation, use [`Table::new_validated`].
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// use table_extractor::Table;
53 ///
54 /// let table = Table::new(
55 /// vec!["id".to_string(), "name".to_string()],
56 /// vec![vec!["1".to_string(), "Alice".to_string()]],
57 /// );
58 /// ```
59 pub fn new(headers: Vec<String>, rows: Vec<Vec<String>>) -> Self {
60 Self { headers, rows }
61 }
62
63 /// Validates that all rows have the same number of columns as headers.
64 ///
65 /// # Errors
66 ///
67 /// Returns [`error::Error::InconsistentColumns`] if any row has a different
68 /// column count than the header.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// use table_extractor::Table;
74 ///
75 /// let table = Table::new(
76 /// vec!["id".to_string(), "name".to_string()],
77 /// vec![
78 /// vec!["1".to_string(), "Alice".to_string()],
79 /// vec!["2".to_string(), "Bob".to_string()],
80 /// ],
81 /// );
82 ///
83 /// assert!(table.validate().is_ok());
84 ///
85 /// // Table with inconsistent columns
86 /// let bad_table = Table::new(
87 /// vec!["id".to_string(), "name".to_string()],
88 /// vec![vec!["1".to_string()]], // Missing column!
89 /// );
90 ///
91 /// assert!(bad_table.validate().is_err());
92 /// ```
93 pub fn validate(&self) -> Result<()> {
94 let expected = self.headers.len();
95 for (idx, row) in self.rows.iter().enumerate() {
96 if row.len() != expected {
97 return Err(error::Error::InconsistentColumns {
98 row: idx + 1,
99 expected,
100 found: row.len(),
101 });
102 }
103 }
104 Ok(())
105 }
106
107 /// Creates a new table and validates it.
108 ///
109 /// This is the recommended way to create a table as it ensures data integrity
110 /// by validating column counts and enforcing limits.
111 ///
112 /// # Errors
113 ///
114 /// Returns an error if:
115 /// - The number of columns exceeds 10,000 ([`error::Error::InvalidFormat`])
116 /// - Any row has a different column count than the header ([`error::Error::InconsistentColumns`])
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use table_extractor::Table;
122 ///
123 /// // Valid table
124 /// let table = Table::new_validated(
125 /// vec!["id".to_string(), "name".to_string()],
126 /// vec![
127 /// vec!["1".to_string(), "Alice".to_string()],
128 /// vec!["2".to_string(), "Bob".to_string()],
129 /// ],
130 /// );
131 /// assert!(table.is_ok());
132 ///
133 /// // Invalid table (inconsistent columns)
134 /// let bad_table = Table::new_validated(
135 /// vec!["id".to_string(), "name".to_string()],
136 /// vec![vec!["1".to_string()]], // Missing column!
137 /// );
138 /// assert!(bad_table.is_err());
139 /// ```
140 pub fn new_validated(headers: Vec<String>, rows: Vec<Vec<String>>) -> Result<Self> {
141 // Check column count limit
142 if headers.len() > MAX_COLUMNS {
143 return Err(error::Error::InvalidFormat(format!(
144 "Too many columns: {} (maximum: {})",
145 headers.len(),
146 MAX_COLUMNS
147 )));
148 }
149
150 let table = Self { headers, rows };
151 table.validate()?;
152 Ok(table)
153 }
154
155 /// Returns `true` if the table contains no data rows.
156 ///
157 /// Note: A table with headers but no data rows is considered empty.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use table_extractor::Table;
163 ///
164 /// let empty_table = Table::new(
165 /// vec!["id".to_string(), "name".to_string()],
166 /// vec![],
167 /// );
168 /// assert!(empty_table.is_empty());
169 ///
170 /// let table_with_data = Table::new(
171 /// vec!["id".to_string(), "name".to_string()],
172 /// vec![vec!["1".to_string(), "Alice".to_string()]],
173 /// );
174 /// assert!(!table_with_data.is_empty());
175 /// ```
176 pub fn is_empty(&self) -> bool {
177 self.rows.is_empty()
178 }
179
180 /// Returns the number of columns in the table.
181 ///
182 /// This is equivalent to the length of the headers vector.
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use table_extractor::Table;
188 ///
189 /// let table = Table::new(
190 /// vec!["id".to_string(), "name".to_string(), "email".to_string()],
191 /// vec![],
192 /// );
193 /// assert_eq!(table.column_count(), 3);
194 /// ```
195 pub fn column_count(&self) -> usize {
196 self.headers.len()
197 }
198}
199
200/// Supported table formats for parsing and auto-detection.
201///
202/// This enum represents the various table formats that can be parsed by the library.
203/// Formats can be auto-detected or explicitly specified.
204///
205/// # Examples
206///
207/// ```
208/// use table_extractor::Format;
209/// use std::str::FromStr;
210///
211/// // Parse format from string
212/// let format = Format::from_str("markdown").unwrap();
213/// assert_eq!(format, Format::Markdown);
214///
215/// // Case insensitive
216/// let format = Format::from_str("MySQL").unwrap();
217/// assert_eq!(format, Format::MySQL);
218///
219/// // Aliases are supported
220/// let format = Format::from_str("psql").unwrap();
221/// assert_eq!(format, Format::PostgreSQL);
222///
223/// // Display trait converts back to canonical string
224/// assert_eq!(format.to_string(), "postgresql");
225/// assert_eq!(Format::CSV.to_string(), "csv");
226///
227/// // Round-trip conversion works
228/// let original = Format::Markdown;
229/// let parsed = Format::from_str(&original.to_string()).unwrap();
230/// assert_eq!(original, parsed);
231/// ```
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum Format {
234 /// Markdown table format with pipe delimiters (e.g., `| col1 | col2 |`)
235 Markdown,
236
237 /// MySQL CLI output format with box-drawing characters (e.g., `+----+----+`)
238 MySQL,
239
240 /// PostgreSQL CLI output format with simple separators (e.g., `----+----`)
241 PostgreSQL,
242
243 /// Comma-separated values (CSV) format
244 CSV,
245
246 /// Tab-separated values (TSV) format
247 TSV,
248}
249
250impl FromStr for Format {
251 type Err = String;
252
253 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
254 match s.to_lowercase().as_str() {
255 "markdown" | "md" => Ok(Format::Markdown),
256 "mysql" => Ok(Format::MySQL),
257 "postgres" | "postgresql" | "psql" => Ok(Format::PostgreSQL),
258 "csv" => Ok(Format::CSV),
259 "tsv" => Ok(Format::TSV),
260 _ => Err(format!(
261 "Invalid format: '{}'. Valid formats: markdown, mysql, postgres, csv, tsv",
262 s
263 )),
264 }
265 }
266}
267
268impl std::fmt::Display for Format {
269 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270 let name = match self {
271 Format::Markdown => "markdown",
272 Format::MySQL => "mysql",
273 Format::PostgreSQL => "postgresql",
274 Format::CSV => "csv",
275 Format::TSV => "tsv",
276 };
277 write!(f, "{}", name)
278 }
279}
280
281/// Trait for parsing table data from various input formats.
282///
283/// Implement this trait to add support for new table formats.
284/// All parsers should validate column consistency by using [`Table::new_validated`].
285///
286/// # Examples
287///
288/// ```
289/// use table_extractor::{Parser, Table, error::Result};
290///
291/// struct CustomParser;
292///
293/// impl Parser for CustomParser {
294/// fn parse(&self, input: &str) -> Result<Table> {
295/// // Parse input into headers and rows
296/// let headers = vec!["col1".to_string(), "col2".to_string()];
297/// let rows = vec![vec!["val1".to_string(), "val2".to_string()]];
298///
299/// // Use new_validated to ensure data integrity
300/// Table::new_validated(headers, rows)
301/// }
302/// }
303/// ```
304pub trait Parser {
305 /// Parses the input string into a table.
306 ///
307 /// # Errors
308 ///
309 /// Returns an error if the input cannot be parsed or if the resulting
310 /// table fails validation (inconsistent columns, too many columns, etc.).
311 fn parse(&self, input: &str) -> Result<Table>;
312}
313
314/// Trait for writing table data to various output formats.
315///
316/// Implement this trait to add support for new output formats.
317///
318/// # Examples
319///
320/// ```
321/// use table_extractor::{Writer, Table, error::Result};
322/// use std::io::Write as IoWrite;
323///
324/// struct CustomWriter;
325///
326/// impl Writer for CustomWriter {
327/// fn write(&self, table: &Table, output: &mut dyn IoWrite) -> Result<()> {
328/// // Write headers
329/// writeln!(output, "{}", table.headers.join(","))?;
330///
331/// // Write rows
332/// for row in &table.rows {
333/// writeln!(output, "{}", row.join(","))?;
334/// }
335///
336/// Ok(())
337/// }
338/// }
339/// ```
340pub trait Writer {
341 /// Writes the table to the provided output stream.
342 ///
343 /// # Errors
344 ///
345 /// Returns an error if writing fails or if the table data is invalid
346 /// for the output format (e.g., delimiter conflicts).
347 fn write(&self, table: &Table, output: &mut dyn Write) -> Result<()>;
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn test_validate_consistent_columns() {
356 let table = Table::new(
357 vec!["id".to_string(), "name".to_string()],
358 vec![
359 vec!["1".to_string(), "Alice".to_string()],
360 vec!["2".to_string(), "Bob".to_string()],
361 ],
362 );
363 assert!(table.validate().is_ok());
364 }
365
366 #[test]
367 fn test_validate_inconsistent_columns() {
368 let table = Table::new(
369 vec!["id".to_string(), "name".to_string()],
370 vec![
371 vec!["1".to_string(), "Alice".to_string()],
372 vec!["2".to_string()], // Missing column
373 ],
374 );
375 let result = table.validate();
376 assert!(result.is_err());
377 if let Err(error::Error::InconsistentColumns {
378 row,
379 expected,
380 found,
381 }) = result
382 {
383 assert_eq!(row, 2);
384 assert_eq!(expected, 2);
385 assert_eq!(found, 1);
386 } else {
387 panic!("Expected InconsistentColumns error");
388 }
389 }
390
391 #[test]
392 fn test_new_validated_success() {
393 let result = Table::new_validated(
394 vec!["id".to_string(), "name".to_string()],
395 vec![
396 vec!["1".to_string(), "Alice".to_string()],
397 vec!["2".to_string(), "Bob".to_string()],
398 ],
399 );
400 assert!(result.is_ok());
401 }
402
403 #[test]
404 fn test_new_validated_fails_on_inconsistent_columns() {
405 let result = Table::new_validated(
406 vec!["id".to_string(), "name".to_string()],
407 vec![
408 vec!["1".to_string(), "Alice".to_string()],
409 vec!["2".to_string()], // Missing column
410 ],
411 );
412 assert!(result.is_err());
413 }
414
415 #[test]
416 fn test_validate_empty_table() {
417 let table = Table::new(vec![], vec![]);
418 assert!(table.validate().is_ok());
419 }
420
421 #[test]
422 fn test_validate_no_rows() {
423 let table = Table::new(vec!["id".to_string(), "name".to_string()], vec![]);
424 assert!(table.validate().is_ok());
425 }
426
427 #[test]
428 fn test_new_validated_rejects_too_many_columns() {
429 let headers: Vec<String> = (0..10001).map(|i| format!("col{}", i)).collect();
430 let result = Table::new_validated(headers, vec![]);
431 assert!(result.is_err());
432 if let Err(error::Error::InvalidFormat(msg)) = result {
433 assert!(msg.contains("Too many columns"));
434 assert!(msg.contains("10001"));
435 assert!(msg.contains("10000"));
436 } else {
437 panic!("Expected InvalidFormat error");
438 }
439 }
440
441 #[test]
442 fn test_new_validated_accepts_max_columns() {
443 let headers: Vec<String> = (0..10000).map(|i| format!("col{}", i)).collect();
444 let result = Table::new_validated(headers, vec![]);
445 assert!(result.is_ok());
446 }
447
448 #[test]
449 fn test_new_validated_accepts_just_under_max() {
450 let headers: Vec<String> = (0..9999).map(|i| format!("col{}", i)).collect();
451 let result = Table::new_validated(headers, vec![]);
452 assert!(result.is_ok());
453 }
454
455 #[test]
456 fn test_format_display() {
457 // Test Display trait for all Format variants
458 assert_eq!(Format::Markdown.to_string(), "markdown");
459 assert_eq!(Format::MySQL.to_string(), "mysql");
460 assert_eq!(Format::PostgreSQL.to_string(), "postgresql");
461 assert_eq!(Format::CSV.to_string(), "csv");
462 assert_eq!(Format::TSV.to_string(), "tsv");
463 }
464
465 #[test]
466 fn test_format_display_roundtrip() {
467 use std::str::FromStr;
468
469 // Test that Display output can be parsed back to the same Format
470 let formats = vec![
471 Format::Markdown,
472 Format::MySQL,
473 Format::PostgreSQL,
474 Format::CSV,
475 Format::TSV,
476 ];
477
478 for format in formats {
479 let string = format.to_string();
480 let parsed = Format::from_str(&string).unwrap();
481 assert_eq!(format, parsed, "Round-trip failed for {}", string);
482 }
483 }
484}