mxsh 0.2.0

Embeddable POSIX-style shell parser and runtime
Documentation
use super::*;

#[derive(Debug, Clone, Copy)]
pub(super) struct ReadConfig<'a> {
    pub(super) ifs: &'a str,
    pub(super) raw_mode: bool,
    pub(super) var_count: usize,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct ReadInput {
    pub(super) line: String,
    pub(super) terminated_by_newline: bool,
    pub(super) eof_after_continuation: bool,
    pub(super) eof_after_newline_continuation: bool,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct ReadResult {
    pub(super) status: i32,
    pub(super) fields: Vec<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct ReadChar {
    ch: char,
    escaped: bool,
}

#[derive(Clone, Copy)]
struct IfsClassifier<'a> {
    ifs: &'a str,
}

#[derive(Clone, Copy)]
enum ParserState {
    AtFieldStart,
    InField,
}

impl<'a> IfsClassifier<'a> {
    fn new(ifs: &'a str) -> Self {
        Self { ifs }
    }

    fn is_any(self, ch: char) -> bool {
        self.ifs.contains(ch)
    }

    fn is_whitespace(self, ch: char) -> bool {
        matches!(ch, ' ' | '\t' | '\n') && self.ifs.contains(ch)
    }
}

pub(super) fn read_stdin_input(
    stdin_fd: sys::FileDescriptor,
    raw_mode: bool,
) -> Result<Option<ReadInput>, io::Error> {
    let Some((mut line, mut terminated_by_newline)) = read_physical_line(stdin_fd)? else {
        return Ok(None);
    };
    if raw_mode {
        return Ok(Some(ReadInput {
            line,
            terminated_by_newline,
            eof_after_continuation: false,
            eof_after_newline_continuation: false,
        }));
    }

    let mut eof_after_continuation = false;
    let mut eof_after_newline_continuation = false;
    while ends_with_unescaped_backslash(&line) {
        let continued_from_newline = terminated_by_newline;
        line.pop();
        match read_physical_line(stdin_fd)? {
            Some((next, next_terminated_by_newline)) => {
                line.push_str(&next);
                terminated_by_newline = next_terminated_by_newline;
            }
            None => {
                terminated_by_newline = false;
                eof_after_continuation = true;
                eof_after_newline_continuation = continued_from_newline;
                break;
            }
        }
    }

    Ok(Some(ReadInput {
        line,
        terminated_by_newline,
        eof_after_continuation,
        eof_after_newline_continuation,
    }))
}

pub(super) fn read_model(input: ReadInput, cfg: ReadConfig<'_>) -> ReadResult {
    let status = if input.terminated_by_newline { 0 } else { 1 };
    if cfg.var_count == 0 {
        return ReadResult {
            status,
            fields: Vec::new(),
        };
    }

    let chars = if cfg.raw_mode {
        input
            .line
            .chars()
            .map(|ch| ReadChar { ch, escaped: false })
            .collect()
    } else {
        decode_read_chars(&input.line)
    };
    let fields = split_fields(
        &chars,
        cfg.ifs,
        cfg.var_count,
        input.terminated_by_newline,
        input.eof_after_continuation,
        input.eof_after_newline_continuation,
    );

    ReadResult { status, fields }
}

fn read_physical_line(stdin_fd: sys::FileDescriptor) -> Result<Option<(String, bool)>, io::Error> {
    stdin_fd.read_line_with_status()
}

fn ends_with_unescaped_backslash(line: &str) -> bool {
    let trailing_backslashes = line
        .as_bytes()
        .iter()
        .rev()
        .take_while(|b| **b == b'\\')
        .count();
    trailing_backslashes % 2 == 1
}

fn decode_read_chars(line: &str) -> Vec<ReadChar> {
    let mut out = Vec::with_capacity(line.len());
    let mut chars = line.chars();
    while let Some(ch) = chars.next() {
        if ch == '\\' {
            if let Some(next) = chars.next() {
                out.push(ReadChar {
                    ch: next,
                    escaped: true,
                });
            }
        } else {
            out.push(ReadChar { ch, escaped: false });
        }
    }
    out
}

fn split_fields(
    chars: &[ReadChar],
    ifs: &str,
    field_count: usize,
    terminated_by_newline: bool,
    eof_after_continuation: bool,
    eof_after_newline_continuation: bool,
) -> Vec<String> {
    if field_count == 0 {
        return Vec::new();
    }
    if ifs.is_empty() {
        return fill_fields(vec![chars_to_string(chars)], field_count);
    }

    let ifs = IfsClassifier::new(ifs);
    let logical_field_count = count_logical_fields(chars, ifs);
    let mut fields = Vec::with_capacity(field_count);
    let mut idx = skip_ifs_whitespace(chars, 0, ifs);

    for field_idx in 0..field_count {
        if idx >= chars.len() {
            fields.push(String::new());
            continue;
        }
        if field_idx + 1 == field_count {
            fields.push(chars_to_string(trim_trailing_ifs(
                &chars[idx..],
                ifs,
                logical_field_count > field_count,
                terminated_by_newline,
                eof_after_continuation,
                eof_after_newline_continuation,
            )));
            continue;
        }
        let (field, next_idx) = parse_field(chars, idx, ifs);
        fields.push(field);
        idx = next_idx;
    }

    fields
}

fn fill_fields(mut fields: Vec<String>, field_count: usize) -> Vec<String> {
    while fields.len() < field_count {
        fields.push(String::new());
    }
    fields
}

fn parse_field(chars: &[ReadChar], start: usize, ifs: IfsClassifier<'_>) -> (String, usize) {
    let mut state = ParserState::AtFieldStart;
    let mut out = String::new();
    let mut idx = start;

    while idx < chars.len() {
        let read_char = chars[idx];
        match state {
            ParserState::AtFieldStart if !read_char.escaped && ifs.is_whitespace(read_char.ch) => {
                idx += 1;
            }
            ParserState::AtFieldStart => state = ParserState::InField,
            ParserState::InField if !read_char.escaped && ifs.is_any(read_char.ch) => {
                return (out, consume_delimiter(chars, idx, ifs));
            }
            ParserState::InField => {
                out.push(read_char.ch);
                idx += 1;
            }
        }
    }

    (out, idx)
}

fn consume_delimiter(chars: &[ReadChar], idx: usize, ifs: IfsClassifier<'_>) -> usize {
    let mut next = idx + 1;
    while next < chars.len() && !chars[next].escaped && ifs.is_whitespace(chars[next].ch) {
        next += 1;
    }
    next
}

fn skip_ifs_whitespace(chars: &[ReadChar], mut idx: usize, ifs: IfsClassifier<'_>) -> usize {
    while idx < chars.len() && !chars[idx].escaped && ifs.is_whitespace(chars[idx].ch) {
        idx += 1;
    }
    idx
}

fn count_logical_fields(chars: &[ReadChar], ifs: IfsClassifier<'_>) -> usize {
    let mut idx = skip_ifs_whitespace(chars, 0, ifs);
    let mut count = 0;
    while idx < chars.len() {
        count += 1;
        while idx < chars.len() && (chars[idx].escaped || !ifs.is_any(chars[idx].ch)) {
            idx += 1;
        }
        if idx >= chars.len() {
            break;
        }
        let delimiter = chars[idx];
        idx = consume_delimiter(chars, idx, ifs);
        if idx >= chars.len() && !delimiter.escaped && ifs.is_any(delimiter.ch) {
            break;
        }
    }
    count
}

fn trim_trailing_ifs<'a>(
    chars: &'a [ReadChar],
    ifs: IfsClassifier<'_>,
    preserve_non_whitespace_ifs: bool,
    _terminated_by_newline: bool,
    eof_after_continuation: bool,
    eof_after_newline_continuation: bool,
) -> &'a [ReadChar] {
    if eof_after_continuation && !eof_after_newline_continuation {
        return chars;
    }
    let trailing_whitespace_start = chars
        .iter()
        .rposition(|ch| !ifs.is_whitespace(ch.ch))
        .map_or(0, |idx| idx + 1);
    if trailing_whitespace_start < chars.len() {
        let cluster = &chars[trailing_whitespace_start..];
        let preceding_is_ifs_whitespace = trailing_whitespace_start > 0
            && ifs.is_whitespace(chars[trailing_whitespace_start - 1].ch)
            && !chars[trailing_whitespace_start - 1].escaped;
        let preceding_allows_preservation = !preceding_is_ifs_whitespace;
        let saw_unescaped_ifs_whitespace_before_cluster = chars[..trailing_whitespace_start]
            .iter()
            .any(|ch| ifs.is_whitespace(ch.ch) && !ch.escaped);
        let escaped_prefix_len = cluster.iter().take_while(|ch| ch.escaped).count();
        let all_escaped = escaped_prefix_len == cluster.len();
        let has_escaped_after_unescaped = cluster[escaped_prefix_len..].iter().any(|ch| ch.escaped);

        if eof_after_continuation
            && !eof_after_newline_continuation
            && preceding_allows_preservation
        {
            return chars;
        }
        if all_escaped
            && (preceding_allows_preservation || trailing_whitespace_start == 0)
            && !saw_unescaped_ifs_whitespace_before_cluster
        {
            return chars;
        }
        if preceding_allows_preservation
            && escaped_prefix_len > 0
            && !has_escaped_after_unescaped
            && !saw_unescaped_ifs_whitespace_before_cluster
        {
            return &chars[..trailing_whitespace_start + escaped_prefix_len];
        }
    }

    let mut end = chars.len();
    while end > 0 {
        let read_char = chars[end - 1];
        if ifs.is_whitespace(read_char.ch) {
            end -= 1;
            continue;
        }
        if !preserve_non_whitespace_ifs && ifs.is_any(read_char.ch) && !read_char.escaped {
            end -= 1;
            continue;
        }
        break;
    }
    &chars[..end]
}

fn chars_to_string(chars: &[ReadChar]) -> String {
    chars.iter().map(|ch| ch.ch).collect()
}