use clap::Parser;
use std::process;
mod cli;
mod selector;
use selector::SelectorError;
include!("utils.rs");
#[derive(Debug, Clone, PartialEq)]
pub struct SelectionState {
pub current_start_idx: usize,
pub current_end_idx: usize,
pub stopped: bool,
}
impl Default for SelectionState {
fn default() -> Self {
Self {
current_start_idx: usize::MAX,
current_end_idx: usize::MAX,
stopped: false,
}
}
}
#[cfg_attr(test, allow(dead_code))]
pub fn item_in_sequence_with_state(
item_idx: usize,
item: &str,
selector: &selector::Selector,
state: &mut SelectionState,
collection_length: usize,
) -> bool {
let mut temp_selector = selector.clone();
temp_selector.resolve_indices(collection_length);
let mut in_sequence = false;
if item_idx != temp_selector.resolved_start_idx
&& temp_selector.resolved_start_idx == temp_selector.resolved_end_idx
&& utils::regex_eq(&temp_selector.start_regex, &temp_selector.end_regex)
&& !utils::regex_is_default(&temp_selector.start_regex)
{
return temp_selector.start_regex.is_match(item);
}
if (item_idx == temp_selector.resolved_start_idx
&& utils::regex_is_default(&temp_selector.start_regex))
|| temp_selector.start_regex.is_match(item)
{
in_sequence = true;
state.current_start_idx = item_idx;
if (utils::regex_eq(&temp_selector.end_regex, &temp_selector.start_regex)
&& !utils::regex_is_default(&temp_selector.start_regex))
|| (temp_selector.resolved_end_idx == temp_selector.resolved_start_idx
&& temp_selector.resolved_start_idx != usize::MAX)
{
state.stopped = true;
state.current_end_idx = item_idx;
}
} else if state.current_start_idx != usize::MAX
&& ((item_idx == temp_selector.resolved_end_idx
&& item_idx >= state.current_start_idx
&& item_idx
.saturating_sub(state.current_start_idx)
.is_multiple_of(temp_selector.step))
|| temp_selector.end_regex.is_match(item))
{
in_sequence = true;
state.current_end_idx = item_idx;
} else if item_idx > state.current_start_idx
&& item_idx < state.current_end_idx
&& item_idx
.saturating_sub(state.current_start_idx)
.is_multiple_of(temp_selector.step)
{
in_sequence = true;
}
in_sequence
}
#[cfg_attr(test, allow(dead_code))]
pub fn item_in_sequence(
item_idx: usize,
item: &str,
selector: &mut selector::Selector,
collection_length: usize,
) -> bool {
selector.resolve_indices(collection_length);
let mut in_sequence = false;
if item_idx != selector.resolved_start_idx
&& selector.resolved_start_idx == selector.resolved_end_idx
&& utils::regex_eq(&selector.start_regex, &selector.end_regex)
&& !utils::regex_is_default(&selector.start_regex)
{
return selector.start_regex.is_match(item);
}
if (item_idx == selector.resolved_start_idx && utils::regex_is_default(&selector.start_regex))
|| selector.start_regex.is_match(item)
{
in_sequence = true;
selector.resolved_start_idx = item_idx;
if (utils::regex_eq(&selector.end_regex, &selector.start_regex)
&& !utils::regex_is_default(&selector.start_regex))
|| (selector.resolved_end_idx == selector.resolved_start_idx)
{
selector.stopped = true;
}
} else if selector.resolved_start_idx != usize::MAX
&& ((item_idx == selector.resolved_end_idx
&& item_idx >= selector.resolved_start_idx
&& item_idx
.saturating_sub(selector.resolved_start_idx)
.is_multiple_of(selector.step))
|| selector.end_regex.is_match(item))
{
in_sequence = true;
selector.resolved_end_idx = item_idx;
} else if item_idx > selector.resolved_start_idx
&& item_idx < selector.resolved_end_idx
&& item_idx
.saturating_sub(selector.resolved_start_idx)
.is_multiple_of(selector.step)
{
in_sequence = true;
}
in_sequence
}
#[cfg_attr(test, allow(dead_code))]
pub fn get_columns_immutable(
index_row: &str,
column_selectors: &[selector::Selector],
column_delimiter: &str,
) -> Result<Vec<usize>, SelectorError> {
if column_selectors.is_empty() {
Ok(Vec::new())
} else {
let mut export_column_idxs: Vec<usize> = Vec::new();
let columns = utils::split(index_row, column_delimiter)?;
let mut states: Vec<SelectionState> =
vec![SelectionState::default(); column_selectors.len()];
for (col_idx, column) in columns.iter().enumerate() {
for (selector_idx, column_selector) in column_selectors.iter().enumerate() {
if item_in_sequence_with_state(
col_idx,
column,
column_selector,
&mut states[selector_idx],
columns.len(),
) {
export_column_idxs.push(col_idx);
}
}
}
Ok(export_column_idxs)
}
}
#[cfg_attr(test, allow(dead_code))]
pub fn get_columns(
index_row: &str,
column_selectors: &mut [selector::Selector],
column_delimiter: &str,
) -> Result<Vec<usize>, SelectorError> {
if column_selectors.is_empty() {
Ok(Vec::new())
} else {
let mut export_column_idxs: Vec<usize> = Vec::new();
let columns = utils::split(index_row, column_delimiter)?;
for (col_idx, column) in columns.iter().enumerate() {
for column_selector in column_selectors.iter_mut() {
if item_in_sequence(col_idx, column, column_selector, columns.len()) {
export_column_idxs.push(col_idx);
}
}
}
Ok(export_column_idxs)
}
}
#[cfg_attr(test, allow(dead_code))]
pub fn get_columns_with_match_info_immutable(
index_row: &str,
column_selectors: &[selector::Selector],
column_delimiter: &str,
original_selectors_str: &str,
) -> Result<(Vec<usize>, Vec<String>), SelectorError> {
if column_selectors.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
let mut export_column_idxs: Vec<usize> = Vec::new();
let mut matched_selectors: Vec<bool> = vec![false; column_selectors.len()];
let columns = utils::split(index_row, column_delimiter)?;
let mut states: Vec<SelectionState> = vec![SelectionState::default(); column_selectors.len()];
for (col_idx, column) in columns.iter().enumerate() {
for (selector_idx, column_selector) in column_selectors.iter().enumerate() {
if item_in_sequence_with_state(
col_idx,
column,
column_selector,
&mut states[selector_idx],
columns.len(),
) {
export_column_idxs.push(col_idx);
matched_selectors[selector_idx] = true;
}
}
}
let original_parts: Vec<&str> = original_selectors_str.split(',').collect();
let unmatched: Vec<String> = matched_selectors
.iter()
.enumerate()
.filter_map(|(idx, &matched)| {
if !matched && idx < original_parts.len() {
Some(original_parts[idx].trim().to_string())
} else {
None
}
})
.collect();
Ok((export_column_idxs, unmatched))
}
#[cfg_attr(test, allow(dead_code))]
pub fn get_columns_with_match_info(
index_row: &str,
column_selectors: &mut [selector::Selector],
column_delimiter: &str,
original_selectors_str: &str,
) -> Result<(Vec<usize>, Vec<String>), SelectorError> {
if column_selectors.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
let mut export_column_idxs: Vec<usize> = Vec::new();
let mut matched_selectors: Vec<bool> = vec![false; column_selectors.len()];
let columns = utils::split(index_row, column_delimiter)?;
for (col_idx, column) in columns.iter().enumerate() {
for (selector_idx, column_selector) in column_selectors.iter_mut().enumerate() {
if item_in_sequence(col_idx, column, column_selector, columns.len()) {
export_column_idxs.push(col_idx);
matched_selectors[selector_idx] = true;
}
}
}
let original_parts: Vec<&str> = original_selectors_str.split(',').collect();
let unmatched: Vec<String> = matched_selectors
.iter()
.enumerate()
.filter_map(|(idx, &matched)| {
if !matched && idx < original_parts.len() {
Some(original_parts[idx].trim().to_string())
} else {
None
}
})
.collect();
Ok((export_column_idxs, unmatched))
}
#[cfg_attr(test, allow(dead_code))]
pub fn get_cells(
row: &str,
cells_to_select: &[usize],
column_delimiter: &str,
select_full_row: bool,
) -> Result<Vec<String>, SelectorError> {
if cells_to_select.is_empty() {
if select_full_row {
Ok(vec![row.to_string()])
} else {
Ok(Vec::new())
}
} else {
let mut output: Vec<String> = Vec::new();
let cells = utils::split(row, column_delimiter)?;
for (cell_idx, cell) in cells.iter().enumerate() {
if cells_to_select.contains(&cell_idx) {
output.push(cell.clone());
}
}
Ok(output)
}
}
#[cfg_attr(test, allow(dead_code))]
pub fn format_columns(output: &[Vec<String>]) -> Vec<String> {
if output.is_empty() {
return Vec::new();
}
let mut col_widths: Vec<usize> = Vec::new();
for row in output {
for (col_idx, cell) in row.iter().enumerate() {
if col_idx >= col_widths.len() {
col_widths.push(0);
}
col_widths[col_idx] = col_widths[col_idx].max(cell.len());
}
}
let mut result: Vec<String> = Vec::new();
for row in output {
let mut formatted_row = String::new();
for (col_idx, cell) in row.iter().enumerate() {
if col_idx == row.len() - 1 {
formatted_row.push_str(cell);
} else {
formatted_row.push_str(&format!("{:width$} ", cell, width = col_widths[col_idx]));
}
}
result.push(formatted_row);
}
result
}
fn main() {
let args = cli::Args::parse();
let input = cli::parse_input(&args.input);
let select_full_row = args.columns.is_empty();
let row_selectors = match selector::parse_selectors(&args.rows) {
Ok(selectors) => selectors,
Err(e) => {
eprintln!("Error parsing row selectors: {}", e);
process::exit(1);
}
};
let column_selectors = match selector::parse_selectors(&args.columns) {
Ok(selectors) => selectors,
Err(e) => {
eprintln!("Error parsing column selectors: {}", e);
process::exit(1);
}
};
let split_rows = match utils::split(&input, &args.row_delimiter) {
Ok(rows) => rows,
Err(e) => {
eprintln!("Error: {}", e);
process::exit(1);
}
};
let mut export_cols: Vec<usize> = Vec::new();
let mut output: Vec<Vec<String>> = Vec::new();
let mut row_states: Vec<SelectionState> = vec![SelectionState::default(); row_selectors.len()];
for (row_idx, row) in split_rows.iter().enumerate() {
if row_idx == 0 {
let (cols, unmatched) = match get_columns_with_match_info_immutable(
row,
&column_selectors,
&args.column_delimiter,
&args.columns,
) {
Ok((cols, unmatched)) => (cols, unmatched),
Err(e) => {
eprintln!("Error: {}", e);
process::exit(1);
}
};
export_cols = cols;
if !select_full_row {
if export_cols.is_empty() {
eprintln!("Warning: No valid columns found for selection");
} else if !unmatched.is_empty() {
eprintln!(
"Warning: Column selectors did not match any columns: {}",
unmatched.join(", ")
);
}
}
}
for (selector_idx, row_selector) in row_selectors.iter().enumerate() {
if item_in_sequence_with_state(
row_idx,
row,
row_selector,
&mut row_states[selector_idx],
split_rows.len(),
) {
let cells =
match get_cells(row, &export_cols, &args.column_delimiter, select_full_row) {
Ok(cells) => cells,
Err(e) => {
eprintln!("Error: {}", e);
process::exit(1);
}
};
output.push(cells);
}
}
}
let formatted_output = format_columns(&output);
for line in formatted_output {
println!("{}", line);
}
}
#[cfg(test)]
#[path = "utils_tests.rs"]
mod utils_tests;
#[cfg(test)]
#[path = "main_tests.rs"]
mod main_tests;