fip 1.0.2

A Simple Field Parser, roughly emulating "awk '{print $<field-number>}'"
//! # fip
//! A Simple Field Parser, roughly emulating \"awk '{print $\<field-number\>}'\"

use std::io::{self, BufRead, Write};

use clap::Parser;

#[derive(Parser)]
#[command(
    author = "Bruschkov",
    version = "1.0.2",
    about = "A Simple Field Parser, roughly emulating \"awk '{print $<field-number>}'\".",
    after_help = "EXAMPLES:
        Get Process IDs from ps aux: \"ps aux | fip 1\"
        Get the values of the third field from a .csv-file: \"fip -s, 2 < file.csv\"
    "
)]
struct Args {
    #[clap(required = true, help = "field number (0-indexed)")]
    field_number: String,
    #[clap(short, long, help = "field separator (Char)", default_value_t = ' ')]
    separator: char,
}

fn main() {
    let args = Args::parse();
    let field_number = args.field_number.parse::<usize>().unwrap();

    let mut input_handle = io::stdin().lock();
    let mut output_handle = io::stdout().lock();
    let mut line = String::new();
    let mut eof = false;

    while !eof {
        match input_handle.read_line(&mut line) {
            Ok(0) => {
                eof = true;
            }
            Ok(_) => {
                writeln!(
                    output_handle,
                    "{}",
                    find_nth_field(&line, args.separator, field_number,)
                )
                .unwrap();
                line.clear();
            }
            Err(_error) => {
                panic!("Could not read line!")
            }
        }
    }
}

/// Find the `field_number`-th field in `line` separated by `separator`.
/// # Examples
/// ```
/// use fip::find_nth_field;
/// let result = find_nth_field("this is a test", ' ', 1);
/// assert_eq!(result, "is");
/// ```
fn find_nth_field(mut line: &str, separator: char, field_number: usize) -> &str {
    if separator == ' ' {
        line = line.trim();
    }

    let mut count = 0;
    let mut start = 0;
    let mut end = line.len();
    let mut last_char = separator;

    for (i, c) in line.char_indices() {
        if c == separator && (c != last_char || separator != ' ') {
            if count == field_number {
                end = i;
                break;
            }
            count += 1;
            start = i + 1;
        }
        last_char = c;
    }
    if count == field_number {
        if separator == ' ' {
            line[start..end].trim_start()
        } else {
            &line[start..end]
        }
    } else {
        ""
    }
}

// -----------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn should_parse_field_in_the_middle_of_line() {
        let result = find_nth_field("this is a test", ' ', 1);
        assert_eq!(result, "is");
    }

    #[test]
    fn should_ignore_whitespaces_at_beginning_of_line() {
        let result = find_nth_field("   this is a test", ' ', 1);
        assert_eq!(result, "is");
    }

    #[test]
    fn should_count_multiple_whitespaces_as_single_separator() {
        let result = find_nth_field("   this   is a test", ' ', 1);
        assert_eq!(result, "is");
    }

    #[test]
    fn should_parse_field_at_the_end_of_line() {
        let result = find_nth_field("this is a test", ' ', 3);
        assert_eq!(result, "test");
    }

    #[test]
    fn should_return_empty_string_if_field_number_higher_than_fields_in_string() {
        let result = find_nth_field("this is a test", ' ', 4);
        assert_eq!(result, "");
    }

    #[test]
    fn should_return_empty_string_in_case_of_empty_line() {
        let result = find_nth_field("", ' ', 4);
        assert_eq!(result, "");
    }

    #[test]
    fn should_not_trim_whitespace_with_non_space_separator() {
        let result = find_nth_field("  a,bc,d", ',', 0);
        assert_eq!(result, "  a");
    }

    #[test]
    fn should_count_empty_fields_with_non_space_separator() {
        let result = find_nth_field(",,a,b", ',', 2);
        assert_eq!(result, "a");
    }

    #[test]
    fn should_return_empty_first_field_with_non_space_separator() {
        let result = find_nth_field(",,a,b", ',', 0);
        assert_eq!(result, "");
    }

    #[test]
    fn should_return_empty_last_field_with_non_space_separator() {
        let result = find_nth_field(",,a,b", ',', 4);
        assert_eq!(result, "");
    }
}