table_extractor/lib.rs
1pub mod detector;
2pub mod error;
3pub mod parser;
4pub mod writer;
5
6use error::Result;
7use std::io::Write;
8use std::str::FromStr;
9
10/// Maximum number of columns allowed in a table.
11/// Prevents out-of-memory attacks via excessively wide tables.
12const MAX_COLUMNS: usize = 10_000;
13
14/// Represents a parsed table with headers and data rows.
15///
16/// All rows must have the same number of columns as the header.
17/// Use [`Table::new_validated`] to create a table with automatic validation.
18///
19/// # Examples
20///
21/// ```
22/// use table_extractor::Table;
23///
24/// let table = Table::new(
25/// vec!["id".to_string(), "name".to_string()],
26/// vec![
27/// vec!["1".to_string(), "Alice".to_string()],
28/// vec!["2".to_string(), "Bob".to_string()],
29/// ],
30/// );
31///
32/// assert_eq!(table.column_count(), 2);
33/// assert!(!table.is_empty());
34/// ```
35#[derive(Debug, Clone, PartialEq)]
36pub struct Table {
37 /// Column headers
38 headers: Vec<String>,
39
40 /// Data rows, where each row should have the same length as headers
41 rows: Vec<Vec<String>>,
42}
43
44impl Table {
45 /// Creates a new table without validation.
46 ///
47 /// For safer construction with automatic validation, use [`Table::new_validated`].
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// use table_extractor::Table;
53 ///
54 /// let table = Table::new(
55 /// vec!["id".to_string(), "name".to_string()],
56 /// vec![vec!["1".to_string(), "Alice".to_string()]],
57 /// );
58 /// ```
59 pub fn new(headers: Vec<String>, rows: Vec<Vec<String>>) -> Self {
60 Self { headers, rows }
61 }
62
63 /// Validates that all rows have the same number of columns as headers.
64 ///
65 /// # Errors
66 ///
67 /// Returns [`error::Error::InconsistentColumns`] if any row has a different
68 /// column count than the header.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// use table_extractor::Table;
74 ///
75 /// let table = Table::new(
76 /// vec!["id".to_string(), "name".to_string()],
77 /// vec![
78 /// vec!["1".to_string(), "Alice".to_string()],
79 /// vec!["2".to_string(), "Bob".to_string()],
80 /// ],
81 /// );
82 ///
83 /// assert!(table.validate().is_ok());
84 ///
85 /// // Table with inconsistent columns
86 /// let bad_table = Table::new(
87 /// vec!["id".to_string(), "name".to_string()],
88 /// vec![vec!["1".to_string()]], // Missing column!
89 /// );
90 ///
91 /// assert!(bad_table.validate().is_err());
92 /// ```
93 pub fn validate(&self) -> Result<()> {
94 let expected = self.headers.len();
95 for (idx, row) in self.rows.iter().enumerate() {
96 if row.len() != expected {
97 return Err(error::Error::InconsistentColumns {
98 row: idx + 1,
99 expected,
100 found: row.len(),
101 });
102 }
103 }
104 Ok(())
105 }
106
107 /// Creates a new table and validates it.
108 ///
109 /// This is the recommended way to create a table as it ensures data integrity
110 /// by validating column counts and enforcing limits.
111 ///
112 /// # Errors
113 ///
114 /// Returns an error if:
115 /// - The number of columns exceeds 10,000 ([`error::Error::InvalidFormat`])
116 /// - Any row has a different column count than the header ([`error::Error::InconsistentColumns`])
117 ///
118 /// # Examples
119 ///
120 /// ```
121 /// use table_extractor::Table;
122 ///
123 /// // Valid table
124 /// let table = Table::new_validated(
125 /// vec!["id".to_string(), "name".to_string()],
126 /// vec![
127 /// vec!["1".to_string(), "Alice".to_string()],
128 /// vec!["2".to_string(), "Bob".to_string()],
129 /// ],
130 /// );
131 /// assert!(table.is_ok());
132 ///
133 /// // Invalid table (inconsistent columns)
134 /// let bad_table = Table::new_validated(
135 /// vec!["id".to_string(), "name".to_string()],
136 /// vec![vec!["1".to_string()]], // Missing column!
137 /// );
138 /// assert!(bad_table.is_err());
139 /// ```
140 pub fn new_validated(headers: Vec<String>, rows: Vec<Vec<String>>) -> Result<Self> {
141 // Check column count limit
142 if headers.len() > MAX_COLUMNS {
143 return Err(error::Error::InvalidFormat(format!(
144 "Too many columns: {} (maximum: {})",
145 headers.len(),
146 MAX_COLUMNS
147 )));
148 }
149
150 let table = Self { headers, rows };
151 table.validate()?;
152 Ok(table)
153 }
154
155 /// Returns `true` if the table contains no data rows.
156 ///
157 /// Note: A table with headers but no data rows is considered empty.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use table_extractor::Table;
163 ///
164 /// let empty_table = Table::new(
165 /// vec!["id".to_string(), "name".to_string()],
166 /// vec![],
167 /// );
168 /// assert!(empty_table.is_empty());
169 ///
170 /// let table_with_data = Table::new(
171 /// vec!["id".to_string(), "name".to_string()],
172 /// vec![vec!["1".to_string(), "Alice".to_string()]],
173 /// );
174 /// assert!(!table_with_data.is_empty());
175 /// ```
176 pub fn is_empty(&self) -> bool {
177 self.rows.is_empty()
178 }
179
180 /// Returns the number of columns in the table.
181 ///
182 /// This is equivalent to the length of the headers vector.
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use table_extractor::Table;
188 ///
189 /// let table = Table::new(
190 /// vec!["id".to_string(), "name".to_string(), "email".to_string()],
191 /// vec![],
192 /// );
193 /// assert_eq!(table.column_count(), 3);
194 /// ```
195 pub fn column_count(&self) -> usize {
196 self.headers.len()
197 }
198
199 /// Returns a reference to the table headers.
200 ///
201 /// # Examples
202 ///
203 /// ```
204 /// use table_extractor::Table;
205 ///
206 /// let table = Table::new(
207 /// vec!["id".to_string(), "name".to_string()],
208 /// vec![],
209 /// );
210 /// assert_eq!(table.headers(), &["id", "name"]);
211 /// ```
212 pub fn headers(&self) -> &[String] {
213 &self.headers
214 }
215
216 /// Returns a reference to the table rows.
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use table_extractor::Table;
222 ///
223 /// let table = Table::new(
224 /// vec!["id".to_string(), "name".to_string()],
225 /// vec![
226 /// vec!["1".to_string(), "Alice".to_string()],
227 /// vec!["2".to_string(), "Bob".to_string()],
228 /// ],
229 /// );
230 /// assert_eq!(table.rows().len(), 2);
231 /// assert_eq!(table.rows()[0], vec!["1", "Alice"]);
232 /// ```
233 pub fn rows(&self) -> &[Vec<String>] {
234 &self.rows
235 }
236
237 /// Consumes the table and returns the headers and rows.
238 ///
239 /// This is useful when you need ownership of the table's data.
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// use table_extractor::Table;
245 ///
246 /// let table = Table::new(
247 /// vec!["id".to_string(), "name".to_string()],
248 /// vec![vec!["1".to_string(), "Alice".to_string()]],
249 /// );
250 ///
251 /// let (headers, rows) = table.into_parts();
252 /// assert_eq!(headers, vec!["id", "name"]);
253 /// assert_eq!(rows.len(), 1);
254 /// ```
255 pub fn into_parts(self) -> (Vec<String>, Vec<Vec<String>>) {
256 (self.headers, self.rows)
257 }
258}
259
260/// Supported table formats for parsing and auto-detection.
261///
262/// This enum represents the various table formats that can be parsed by the library.
263/// Formats can be auto-detected or explicitly specified.
264///
265/// # Examples
266///
267/// ```
268/// use table_extractor::Format;
269/// use std::str::FromStr;
270///
271/// // Parse format from string
272/// let format = Format::from_str("markdown").unwrap();
273/// assert_eq!(format, Format::Markdown);
274///
275/// // Case insensitive
276/// let format = Format::from_str("MySQL").unwrap();
277/// assert_eq!(format, Format::MySQL);
278///
279/// // Aliases are supported
280/// let format = Format::from_str("psql").unwrap();
281/// assert_eq!(format, Format::PostgreSQL);
282///
283/// // Display trait converts back to canonical string
284/// assert_eq!(format.to_string(), "postgresql");
285/// assert_eq!(Format::CSV.to_string(), "csv");
286///
287/// // Round-trip conversion works
288/// let original = Format::Markdown;
289/// let parsed = Format::from_str(&original.to_string()).unwrap();
290/// assert_eq!(original, parsed);
291/// ```
292#[derive(Debug, Clone, Copy, PartialEq, Eq)]
293pub enum Format {
294 /// Markdown table format with pipe delimiters (e.g., `| col1 | col2 |`)
295 Markdown,
296
297 /// MySQL CLI output format with box-drawing characters (e.g., `+----+----+`)
298 MySQL,
299
300 /// PostgreSQL CLI output format with simple separators (e.g., `----+----`)
301 PostgreSQL,
302
303 /// Comma-separated values (CSV) format
304 CSV,
305
306 /// Tab-separated values (TSV) format
307 TSV,
308}
309
310impl FromStr for Format {
311 type Err = String;
312
313 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
314 match s.to_lowercase().as_str() {
315 "markdown" | "md" => Ok(Format::Markdown),
316 "mysql" => Ok(Format::MySQL),
317 "postgres" | "postgresql" | "psql" => Ok(Format::PostgreSQL),
318 "csv" => Ok(Format::CSV),
319 "tsv" => Ok(Format::TSV),
320 _ => Err(format!(
321 "Invalid format: '{}'. Valid formats: markdown, mysql, postgres, csv, tsv",
322 s
323 )),
324 }
325 }
326}
327
328impl std::fmt::Display for Format {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 let name = match self {
331 Format::Markdown => "markdown",
332 Format::MySQL => "mysql",
333 Format::PostgreSQL => "postgresql",
334 Format::CSV => "csv",
335 Format::TSV => "tsv",
336 };
337 write!(f, "{}", name)
338 }
339}
340
341/// Trait for parsing table data from various input formats.
342///
343/// Implement this trait to add support for new table formats.
344/// All parsers should validate column consistency by using [`Table::new_validated`].
345///
346/// # Examples
347///
348/// ```
349/// use table_extractor::{Parser, Table, error::Result};
350///
351/// struct CustomParser;
352///
353/// impl Parser for CustomParser {
354/// fn parse(&self, input: &str) -> Result<Table> {
355/// // Parse input into headers and rows
356/// let headers = vec!["col1".to_string(), "col2".to_string()];
357/// let rows = vec![vec!["val1".to_string(), "val2".to_string()]];
358///
359/// // Use new_validated to ensure data integrity
360/// Table::new_validated(headers, rows)
361/// }
362/// }
363/// ```
364pub trait Parser {
365 /// Parses the input string into a table.
366 ///
367 /// # Errors
368 ///
369 /// Returns an error if the input cannot be parsed or if the resulting
370 /// table fails validation (inconsistent columns, too many columns, etc.).
371 fn parse(&self, input: &str) -> Result<Table>;
372}
373
374/// Trait for writing table data to various output formats.
375///
376/// Implement this trait to add support for new output formats.
377///
378/// # Examples
379///
380/// ```
381/// use table_extractor::{Writer, Table, error::Result};
382/// use std::io::Write as IoWrite;
383///
384/// struct CustomWriter;
385///
386/// impl Writer for CustomWriter {
387/// fn write(&self, table: &Table, output: &mut dyn IoWrite) -> Result<()> {
388/// // Write headers
389/// writeln!(output, "{}", table.headers().join(","))?;
390///
391/// // Write rows
392/// for row in table.rows() {
393/// writeln!(output, "{}", row.join(","))?;
394/// }
395///
396/// Ok(())
397/// }
398/// }
399/// ```
400pub trait Writer {
401 /// Writes the table to the provided output stream.
402 ///
403 /// # Errors
404 ///
405 /// Returns an error if writing fails or if the table data is invalid
406 /// for the output format (e.g., delimiter conflicts).
407 fn write(&self, table: &Table, output: &mut dyn Write) -> Result<()>;
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413
414 #[test]
415 fn test_validate_consistent_columns() {
416 let table = Table::new(
417 vec!["id".to_string(), "name".to_string()],
418 vec![
419 vec!["1".to_string(), "Alice".to_string()],
420 vec!["2".to_string(), "Bob".to_string()],
421 ],
422 );
423 assert!(table.validate().is_ok());
424 }
425
426 #[test]
427 fn test_validate_inconsistent_columns() {
428 let table = Table::new(
429 vec!["id".to_string(), "name".to_string()],
430 vec![
431 vec!["1".to_string(), "Alice".to_string()],
432 vec!["2".to_string()], // Missing column
433 ],
434 );
435 let result = table.validate();
436 assert!(result.is_err());
437 if let Err(error::Error::InconsistentColumns {
438 row,
439 expected,
440 found,
441 }) = result
442 {
443 assert_eq!(row, 2);
444 assert_eq!(expected, 2);
445 assert_eq!(found, 1);
446 } else {
447 panic!("Expected InconsistentColumns error");
448 }
449 }
450
451 #[test]
452 fn test_new_validated_success() {
453 let result = Table::new_validated(
454 vec!["id".to_string(), "name".to_string()],
455 vec![
456 vec!["1".to_string(), "Alice".to_string()],
457 vec!["2".to_string(), "Bob".to_string()],
458 ],
459 );
460 assert!(result.is_ok());
461 }
462
463 #[test]
464 fn test_new_validated_fails_on_inconsistent_columns() {
465 let result = Table::new_validated(
466 vec!["id".to_string(), "name".to_string()],
467 vec![
468 vec!["1".to_string(), "Alice".to_string()],
469 vec!["2".to_string()], // Missing column
470 ],
471 );
472 assert!(result.is_err());
473 }
474
475 #[test]
476 fn test_validate_empty_table() {
477 let table = Table::new(vec![], vec![]);
478 assert!(table.validate().is_ok());
479 }
480
481 #[test]
482 fn test_validate_no_rows() {
483 let table = Table::new(vec!["id".to_string(), "name".to_string()], vec![]);
484 assert!(table.validate().is_ok());
485 }
486
487 #[test]
488 fn test_new_validated_rejects_too_many_columns() {
489 let headers: Vec<String> = (0..10001).map(|i| format!("col{}", i)).collect();
490 let result = Table::new_validated(headers, vec![]);
491 assert!(result.is_err());
492 if let Err(error::Error::InvalidFormat(msg)) = result {
493 assert!(msg.contains("Too many columns"));
494 assert!(msg.contains("10001"));
495 assert!(msg.contains("10000"));
496 } else {
497 panic!("Expected InvalidFormat error");
498 }
499 }
500
501 #[test]
502 fn test_new_validated_accepts_max_columns() {
503 let headers: Vec<String> = (0..10000).map(|i| format!("col{}", i)).collect();
504 let result = Table::new_validated(headers, vec![]);
505 assert!(result.is_ok());
506 }
507
508 #[test]
509 fn test_new_validated_accepts_just_under_max() {
510 let headers: Vec<String> = (0..9999).map(|i| format!("col{}", i)).collect();
511 let result = Table::new_validated(headers, vec![]);
512 assert!(result.is_ok());
513 }
514
515 #[test]
516 fn test_format_display() {
517 // Test Display trait for all Format variants
518 assert_eq!(Format::Markdown.to_string(), "markdown");
519 assert_eq!(Format::MySQL.to_string(), "mysql");
520 assert_eq!(Format::PostgreSQL.to_string(), "postgresql");
521 assert_eq!(Format::CSV.to_string(), "csv");
522 assert_eq!(Format::TSV.to_string(), "tsv");
523 }
524
525 #[test]
526 fn test_format_display_roundtrip() {
527 use std::str::FromStr;
528
529 // Test that Display output can be parsed back to the same Format
530 let formats = vec![
531 Format::Markdown,
532 Format::MySQL,
533 Format::PostgreSQL,
534 Format::CSV,
535 Format::TSV,
536 ];
537
538 for format in formats {
539 let string = format.to_string();
540 let parsed = Format::from_str(&string).unwrap();
541 assert_eq!(format, parsed, "Round-trip failed for {}", string);
542 }
543 }
544}