tabiew 0.14.0

A lightweight TUI application to view and query tabular data files, such as CSV, TSV, and parquet.
use std::io::Read;

use polars::{
    frame::DataFrame,
    prelude::{AnyValue, Column},
};
use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};

use crate::{
    AppResult,
    args::Args,
    io::reader::{DataFrameReader, NamedFrames, ReaderSource},
    misc::stdin::stdin,
};

#[derive(Debug, Default)]
pub struct MarkdownToDataFrame;

impl MarkdownToDataFrame {
    pub fn from_args(_args: &Args) -> Self {
        Self
    }
}

impl DataFrameReader for MarkdownToDataFrame {
    fn read_to_data_frames(&self, input: ReaderSource) -> AppResult<NamedFrames> {
        let contents = match &input {
            ReaderSource::File(path) => std::fs::read_to_string(path)?,
            ReaderSource::Stdin => {
                let mut s = String::new();
                stdin().read_to_string(&mut s)?;
                s
            }
        };

        let parser = Parser::new_ext(&contents, Options::ENABLE_TABLES);

        collect_tables(parser)
            .into_iter()
            .enumerate()
            .map(|(idx, (headers, rows))| {
                let df = build_data_frame(headers, rows)?;
                Ok((format!("markdown_table_{}", idx + 1), df))
            })
            .collect::<AppResult<Vec<_>>>()
            .map(Vec::into_boxed_slice)
    }
}

fn collect_tables(parser: Parser) -> Vec<(Vec<String>, Vec<Vec<String>>)> {
    let mut tables: Vec<(Vec<String>, Vec<Vec<String>>)> = Vec::new();
    let mut headers: Vec<String> = Vec::new();
    let mut rows: Vec<Vec<String>> = Vec::new();
    let mut row: Vec<String> = Vec::new();
    let mut cell = String::new();

    let mut in_head = false;
    let mut in_cell = false;

    for event in parser {
        match event {
            Event::Start(Tag::Table(_)) => {
                headers.clear();
                rows.clear();
            }
            Event::End(TagEnd::Table) => {
                tables.push((std::mem::take(&mut headers), std::mem::take(&mut rows)));
            }
            Event::Start(Tag::TableHead) => in_head = true,
            Event::End(TagEnd::TableHead) => in_head = false,
            Event::Start(Tag::TableRow) => row.clear(),
            Event::End(TagEnd::TableRow) if !in_head => {
                rows.push(std::mem::take(&mut row));
            }
            Event::Start(Tag::TableCell) => {
                in_cell = true;
                cell.clear();
            }
            Event::End(TagEnd::TableCell) => {
                in_cell = false;
                let value = std::mem::take(&mut cell).trim().to_owned();
                if in_head {
                    headers.push(value);
                } else {
                    row.push(value);
                }
            }
            Event::Text(s) | Event::Code(s) if in_cell => cell.push_str(&s),
            Event::SoftBreak | Event::HardBreak if in_cell => cell.push(' '),
            _ => {}
        }
    }

    tables
}

fn build_data_frame(headers: Vec<String>, rows: Vec<Vec<String>>) -> AppResult<DataFrame> {
    let column_count = headers.len();
    let mut columns: Vec<Vec<AnyValue<'static>>> = (0..column_count).map(|_| Vec::new()).collect();
    for row in rows {
        let mut row_iter = row.into_iter();
        for col in columns.iter_mut() {
            col.push(match row_iter.next() {
                Some(value) => AnyValue::StringOwned(value.into()),
                None => AnyValue::Null,
            });
        }
    }
    Ok(DataFrame::new_infer_height(
        headers
            .into_iter()
            .zip(columns)
            .map(|(name, values)| Column::new(name.into(), values))
            .collect(),
    )?)
}