use super::*;
#[derive(Debug, Clone, Copy)]
pub(super) struct ReadConfig<'a> {
pub(super) ifs: &'a str,
pub(super) raw_mode: bool,
pub(super) var_count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct ReadInput {
pub(super) line: String,
pub(super) terminated_by_newline: bool,
pub(super) eof_after_continuation: bool,
pub(super) eof_after_newline_continuation: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct ReadResult {
pub(super) status: i32,
pub(super) fields: Vec<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct ReadChar {
ch: char,
escaped: bool,
}
#[derive(Clone, Copy)]
struct IfsClassifier<'a> {
ifs: &'a str,
}
#[derive(Clone, Copy)]
enum ParserState {
AtFieldStart,
InField,
}
impl<'a> IfsClassifier<'a> {
fn new(ifs: &'a str) -> Self {
Self { ifs }
}
fn is_any(self, ch: char) -> bool {
self.ifs.contains(ch)
}
fn is_whitespace(self, ch: char) -> bool {
matches!(ch, ' ' | '\t' | '\n') && self.ifs.contains(ch)
}
}
pub(super) fn read_stdin_input(
stdin_fd: sys::FileDescriptor,
raw_mode: bool,
) -> Result<Option<ReadInput>, io::Error> {
let Some((mut line, mut terminated_by_newline)) = read_physical_line(stdin_fd)? else {
return Ok(None);
};
if raw_mode {
return Ok(Some(ReadInput {
line,
terminated_by_newline,
eof_after_continuation: false,
eof_after_newline_continuation: false,
}));
}
let mut eof_after_continuation = false;
let mut eof_after_newline_continuation = false;
while ends_with_unescaped_backslash(&line) {
let continued_from_newline = terminated_by_newline;
line.pop();
match read_physical_line(stdin_fd)? {
Some((next, next_terminated_by_newline)) => {
line.push_str(&next);
terminated_by_newline = next_terminated_by_newline;
}
None => {
terminated_by_newline = false;
eof_after_continuation = true;
eof_after_newline_continuation = continued_from_newline;
break;
}
}
}
Ok(Some(ReadInput {
line,
terminated_by_newline,
eof_after_continuation,
eof_after_newline_continuation,
}))
}
pub(super) fn read_model(input: ReadInput, cfg: ReadConfig<'_>) -> ReadResult {
let status = if input.terminated_by_newline { 0 } else { 1 };
if cfg.var_count == 0 {
return ReadResult {
status,
fields: Vec::new(),
};
}
let chars = if cfg.raw_mode {
input
.line
.chars()
.map(|ch| ReadChar { ch, escaped: false })
.collect()
} else {
decode_read_chars(&input.line)
};
let fields = split_fields(
&chars,
cfg.ifs,
cfg.var_count,
input.terminated_by_newline,
input.eof_after_continuation,
input.eof_after_newline_continuation,
);
ReadResult { status, fields }
}
fn read_physical_line(stdin_fd: sys::FileDescriptor) -> Result<Option<(String, bool)>, io::Error> {
stdin_fd.read_line_with_status()
}
fn ends_with_unescaped_backslash(line: &str) -> bool {
let trailing_backslashes = line
.as_bytes()
.iter()
.rev()
.take_while(|b| **b == b'\\')
.count();
trailing_backslashes % 2 == 1
}
fn decode_read_chars(line: &str) -> Vec<ReadChar> {
let mut out = Vec::with_capacity(line.len());
let mut chars = line.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
if let Some(next) = chars.next() {
out.push(ReadChar {
ch: next,
escaped: true,
});
}
} else {
out.push(ReadChar { ch, escaped: false });
}
}
out
}
fn split_fields(
chars: &[ReadChar],
ifs: &str,
field_count: usize,
terminated_by_newline: bool,
eof_after_continuation: bool,
eof_after_newline_continuation: bool,
) -> Vec<String> {
if field_count == 0 {
return Vec::new();
}
if ifs.is_empty() {
return fill_fields(vec![chars_to_string(chars)], field_count);
}
let ifs = IfsClassifier::new(ifs);
let logical_field_count = count_logical_fields(chars, ifs);
let mut fields = Vec::with_capacity(field_count);
let mut idx = skip_ifs_whitespace(chars, 0, ifs);
for field_idx in 0..field_count {
if idx >= chars.len() {
fields.push(String::new());
continue;
}
if field_idx + 1 == field_count {
fields.push(chars_to_string(trim_trailing_ifs(
&chars[idx..],
ifs,
logical_field_count > field_count,
terminated_by_newline,
eof_after_continuation,
eof_after_newline_continuation,
)));
continue;
}
let (field, next_idx) = parse_field(chars, idx, ifs);
fields.push(field);
idx = next_idx;
}
fields
}
fn fill_fields(mut fields: Vec<String>, field_count: usize) -> Vec<String> {
while fields.len() < field_count {
fields.push(String::new());
}
fields
}
fn parse_field(chars: &[ReadChar], start: usize, ifs: IfsClassifier<'_>) -> (String, usize) {
let mut state = ParserState::AtFieldStart;
let mut out = String::new();
let mut idx = start;
while idx < chars.len() {
let read_char = chars[idx];
match state {
ParserState::AtFieldStart if !read_char.escaped && ifs.is_whitespace(read_char.ch) => {
idx += 1;
}
ParserState::AtFieldStart => state = ParserState::InField,
ParserState::InField if !read_char.escaped && ifs.is_any(read_char.ch) => {
return (out, consume_delimiter(chars, idx, ifs));
}
ParserState::InField => {
out.push(read_char.ch);
idx += 1;
}
}
}
(out, idx)
}
fn consume_delimiter(chars: &[ReadChar], idx: usize, ifs: IfsClassifier<'_>) -> usize {
let mut next = idx + 1;
while next < chars.len() && !chars[next].escaped && ifs.is_whitespace(chars[next].ch) {
next += 1;
}
next
}
fn skip_ifs_whitespace(chars: &[ReadChar], mut idx: usize, ifs: IfsClassifier<'_>) -> usize {
while idx < chars.len() && !chars[idx].escaped && ifs.is_whitespace(chars[idx].ch) {
idx += 1;
}
idx
}
fn count_logical_fields(chars: &[ReadChar], ifs: IfsClassifier<'_>) -> usize {
let mut idx = skip_ifs_whitespace(chars, 0, ifs);
let mut count = 0;
while idx < chars.len() {
count += 1;
while idx < chars.len() && (chars[idx].escaped || !ifs.is_any(chars[idx].ch)) {
idx += 1;
}
if idx >= chars.len() {
break;
}
let delimiter = chars[idx];
idx = consume_delimiter(chars, idx, ifs);
if idx >= chars.len() && !delimiter.escaped && ifs.is_any(delimiter.ch) {
break;
}
}
count
}
fn trim_trailing_ifs<'a>(
chars: &'a [ReadChar],
ifs: IfsClassifier<'_>,
preserve_non_whitespace_ifs: bool,
_terminated_by_newline: bool,
eof_after_continuation: bool,
eof_after_newline_continuation: bool,
) -> &'a [ReadChar] {
if eof_after_continuation && !eof_after_newline_continuation {
return chars;
}
let trailing_whitespace_start = chars
.iter()
.rposition(|ch| !ifs.is_whitespace(ch.ch))
.map_or(0, |idx| idx + 1);
if trailing_whitespace_start < chars.len() {
let cluster = &chars[trailing_whitespace_start..];
let preceding_is_ifs_whitespace = trailing_whitespace_start > 0
&& ifs.is_whitespace(chars[trailing_whitespace_start - 1].ch)
&& !chars[trailing_whitespace_start - 1].escaped;
let preceding_allows_preservation = !preceding_is_ifs_whitespace;
let saw_unescaped_ifs_whitespace_before_cluster = chars[..trailing_whitespace_start]
.iter()
.any(|ch| ifs.is_whitespace(ch.ch) && !ch.escaped);
let escaped_prefix_len = cluster.iter().take_while(|ch| ch.escaped).count();
let all_escaped = escaped_prefix_len == cluster.len();
let has_escaped_after_unescaped = cluster[escaped_prefix_len..].iter().any(|ch| ch.escaped);
if eof_after_continuation
&& !eof_after_newline_continuation
&& preceding_allows_preservation
{
return chars;
}
if all_escaped
&& (preceding_allows_preservation || trailing_whitespace_start == 0)
&& !saw_unescaped_ifs_whitespace_before_cluster
{
return chars;
}
if preceding_allows_preservation
&& escaped_prefix_len > 0
&& !has_escaped_after_unescaped
&& !saw_unescaped_ifs_whitespace_before_cluster
{
return &chars[..trailing_whitespace_start + escaped_prefix_len];
}
}
let mut end = chars.len();
while end > 0 {
let read_char = chars[end - 1];
if ifs.is_whitespace(read_char.ch) {
end -= 1;
continue;
}
if !preserve_non_whitespace_ifs && ifs.is_any(read_char.ch) && !read_char.escaped {
end -= 1;
continue;
}
break;
}
&chars[..end]
}
fn chars_to_string(chars: &[ReadChar]) -> String {
chars.iter().map(|ch| ch.ch).collect()
}