#![allow(unused)]
use std::ops::Range;
pub fn write_tab(buf: &mut String) {
buf.push('\t');
}
pub fn write_newline(buf: &mut String) {
buf.push('\n');
}
pub fn write_content(buf: &mut String, mut item: &str) {
if item.is_empty() {
item = " ";
}
buf.reserve(item.len());
for char in item.chars() {
match char {
'\t' => buf.push_str(r"\t"),
'\n' => buf.push_str(r"\n"),
'\r' => buf.push_str(r"\r"),
'\\' => buf.push_str(r"\\"),
_ => buf.push(char),
}
}
}
pub struct ParsedTsv {
data: String,
cell_spans: Vec<Range<u32>>,
row_offsets: Vec<u32>,
}
impl ParsedTsv {
pub fn parse(data: &str) -> Self {
#[derive(Clone, Copy)]
enum ParseState {
Empty,
Escaping,
}
let mut s = Self {
data: Default::default(),
cell_spans: Default::default(),
row_offsets: Default::default(),
};
let mut state = ParseState::Empty;
let mut cell_start_char = 0;
s.row_offsets.push(0);
for char in data.chars() {
match state {
ParseState::Empty => match char {
'\n' | '\t' => {
if char == '\t' || cell_start_char != s.data.len() as u32 {
s.cell_spans.push(cell_start_char..s.data.len() as u32);
cell_start_char = s.data.len() as _;
}
if char == '\n' {
s.row_offsets.push(s.cell_spans.len() as _);
}
}
'\r' => {
}
'\\' => state = ParseState::Escaping,
ch => s.data.push(ch),
},
ParseState::Escaping => {
match char {
't' => s.data.push('\t'),
'n' => s.data.push('\n'),
'r' => s.data.push('\r'),
'\\' => s.data.push('\\'),
ch => {
s.data.push('\\');
s.data.push(ch);
}
}
state = ParseState::Empty;
}
}
}
{
if cell_start_char != s.data.len() as u32 {
s.cell_spans.push(cell_start_char..s.data.len() as u32);
}
if *s.row_offsets.last().unwrap() != s.cell_spans.len() as u32 {
s.row_offsets.push(s.cell_spans.len() as _);
}
}
s.data.shrink_to_fit();
s.cell_spans.shrink_to_fit();
s.row_offsets.shrink_to_fit();
s
}
pub fn calc_table_width(&self) -> usize {
self.row_offsets
.windows(2)
.map(|range| range[1] - range[0])
.max()
.unwrap_or(0) as usize
}
pub fn num_columns_at(&self, row: usize) -> usize {
if row >= self.row_offsets.len() - 1 {
return 0;
}
let start = self.row_offsets[row] as usize;
let end = self.row_offsets[row + 1] as usize;
end - start
}
pub fn num_rows(&self) -> usize {
self.row_offsets.len() - 1
}
pub fn get_cell(&self, row: usize, column: usize) -> Option<&str> {
let row_offset = *self.row_offsets.get(row)? as usize;
let cell_span = self.cell_spans.get(row_offset + column)?;
Some(&self.data[cell_span.start as usize..cell_span.end as usize])
}
pub fn iter_rows(&self) -> impl Iterator<Item = (usize, impl Iterator<Item = (usize, &str)>)> {
self.row_offsets
.windows(2)
.enumerate()
.map(move |(row, range)| {
let (start, end) = (range[0] as usize, range[1] as usize);
let row_iter = (start..end).map(move |cell_offset| {
let cell_span = self.cell_spans.get(cell_offset).unwrap();
(
cell_offset - start,
&self.data[cell_span.start as usize..cell_span.end as usize],
)
});
(row, row_iter)
})
}
#[cfg(test)]
fn iter_index_data(&self) -> impl Iterator<Item = (usize, usize, &str)> {
self.iter_rows()
.flat_map(|(row, row_iter)| row_iter.map(move |(col, data)| (row, col, data)))
}
}
#[test]
fn tsv_parsing() {
const TSV_DATA: &str = "Hello\tWorld\nThis\tIs\tA\tTest";
let parsed = ParsedTsv::parse(TSV_DATA);
assert_eq!(parsed.num_columns_at(0), 2);
assert_eq!(parsed.num_columns_at(1), 4);
assert_eq!(parsed.num_columns_at(2), 0);
assert_eq!(parsed.num_rows(), 2);
assert_eq!(parsed.get_cell(0, 0), Some("Hello"));
assert_eq!(parsed.get_cell(0, 1), Some("World"));
assert_eq!(parsed.get_cell(1, 0), Some("This"));
assert_eq!(parsed.get_cell(1, 1), Some("Is"));
assert_eq!(parsed.get_cell(1, 2), Some("A"));
assert_eq!(parsed.get_cell(1, 3), Some("Test"));
assert!(parsed.get_cell(1, 4).is_none());
assert_eq!(
parsed.iter_index_data().collect::<Vec<_>>(),
vec![
(0, 0, "Hello"),
(0, 1, "World"),
(1, 0, "This"),
(1, 1, "Is"),
(1, 2, "A"),
(1, 3, "Test"),
]
);
}