use super::index::DsvIndex;
#[derive(Clone, Copy, Debug)]
pub struct DsvCursor<'a> {
text: &'a [u8],
index: &'a DsvIndex,
position: usize,
}
impl<'a> DsvCursor<'a> {
pub fn new(text: &'a [u8], index: &'a DsvIndex) -> Self {
Self {
text,
index,
position: 0,
}
}
#[inline]
pub fn position(&self) -> usize {
self.position
}
#[inline]
pub fn at_end(&self) -> bool {
self.position >= self.text.len()
}
pub fn next_field(&mut self) -> bool {
if self.at_end() {
return false;
}
let current_rank = self.index.markers_rank1(self.position);
if let Some(next_pos) = self.index.markers_select1(current_rank) {
if next_pos < self.text.len() {
self.position = next_pos + 1; return !self.at_end();
}
}
self.position = self.text.len();
false
}
pub fn next_row(&mut self) -> bool {
if self.at_end() {
return false;
}
let current_rank = self.index.newlines_rank1(self.position);
if let Some(next_pos) = self.index.newlines_select1(current_rank) {
if next_pos < self.text.len() {
self.position = next_pos + 1; return !self.at_end();
}
}
self.position = self.text.len();
false
}
pub fn goto_row(&mut self, n: usize) -> bool {
if n == 0 {
self.position = 0;
return !self.text.is_empty();
}
if let Some(newline_pos) = self.index.newlines_select1(n - 1) {
let new_pos = newline_pos + 1;
if new_pos <= self.text.len() {
self.position = new_pos;
return !self.at_end();
}
}
false
}
pub fn current_field(&self) -> &'a [u8] {
if self.at_end() {
return &[];
}
let start = self.position;
let current_rank = self.index.markers_rank1(start);
let end = self
.index
.markers_select1(current_rank)
.unwrap_or(self.text.len());
&self.text[start..end]
}
#[cfg(feature = "std")]
pub fn current_field_str(&self) -> Result<&'a str, core::str::Utf8Error> {
core::str::from_utf8(self.current_field())
}
fn at_newline(&self) -> bool {
if self.position == 0 || self.position > self.text.len() {
return false;
}
let prev_pos = self.position - 1;
let rank_before = self.index.newlines_rank1(prev_pos);
let rank_at = self.index.newlines_rank1(self.position);
rank_at > rank_before
}
}
#[derive(Clone, Copy, Debug)]
pub struct DsvRow<'a> {
cursor: DsvCursor<'a>,
row_start: usize,
}
impl<'a> DsvRow<'a> {
pub(crate) fn from_cursor(cursor: DsvCursor<'a>) -> Self {
Self {
row_start: cursor.position,
cursor,
}
}
pub fn fields(&self) -> DsvFields<'a> {
DsvFields {
cursor: DsvCursor {
position: self.row_start,
..self.cursor
},
row_start: self.row_start,
started: false,
finished: false,
}
}
pub fn get(&self, column: usize) -> Option<&'a [u8]> {
let mut cursor = DsvCursor {
position: self.row_start,
..self.cursor
};
for _ in 0..column {
let field = cursor.current_field();
if field.is_empty() && cursor.at_end() {
return None;
}
if !cursor.next_field() {
return None;
}
if cursor.at_newline() || cursor.at_end() {
return None;
}
}
let field = cursor.current_field();
if field.is_empty() && cursor.at_end() {
None
} else {
Some(field)
}
}
}
pub struct DsvRows<'a> {
cursor: DsvCursor<'a>,
started: bool,
}
impl<'a> DsvRows<'a> {
pub fn new(text: &'a [u8], index: &'a DsvIndex) -> Self {
Self {
cursor: DsvCursor::new(text, index),
started: false,
}
}
}
impl<'a> Iterator for DsvRows<'a> {
type Item = DsvRow<'a>;
fn next(&mut self) -> Option<Self::Item> {
if !self.started {
self.started = true;
if self.cursor.at_end() {
return None;
}
return Some(DsvRow::from_cursor(self.cursor));
}
if self.cursor.next_row() {
Some(DsvRow::from_cursor(self.cursor))
} else {
None
}
}
}
pub struct DsvFields<'a> {
cursor: DsvCursor<'a>,
#[allow(dead_code)]
row_start: usize,
started: bool,
finished: bool,
}
impl<'a> Iterator for DsvFields<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
if !self.started {
self.started = true;
let field = self.cursor.current_field();
let field_end = self.cursor.position + field.len();
if field_end >= self.cursor.text.len() {
self.finished = true;
} else {
let next_rank = self.cursor.index.newlines_rank1(field_end + 1);
let curr_rank = self.cursor.index.newlines_rank1(field_end);
if next_rank > curr_rank {
self.finished = true;
}
}
return Some(field);
}
if !self.cursor.next_field() {
self.finished = true;
return None;
}
if self.cursor.at_newline() {
self.finished = true;
return None;
}
let field = self.cursor.current_field();
let field_end = self.cursor.position + field.len();
if field_end >= self.cursor.text.len() {
self.finished = true;
} else {
let next_rank = self.cursor.index.newlines_rank1(field_end + 1);
let curr_rank = self.cursor.index.newlines_rank1(field_end);
if next_rank > curr_rank {
self.finished = true;
}
}
Some(field)
}
}
#[allow(dead_code)]
pub fn strip_quotes(field: &[u8]) -> &[u8] {
if field.len() >= 2 && field[0] == b'"' && field[field.len() - 1] == b'"' {
&field[1..field.len() - 1]
} else {
field
}
}
#[cfg(test)]
mod tests {
use super::super::parser::build_index;
use super::super::DsvConfig;
use super::*;
#[test]
fn test_cursor_navigation() {
let csv = b"a,b,c\n1,2,3\n";
let config = DsvConfig::default();
let index = build_index(csv, &config);
let mut cursor = DsvCursor::new(csv, &index);
assert_eq!(cursor.position(), 0);
assert_eq!(cursor.current_field(), b"a");
assert!(cursor.next_field());
assert_eq!(cursor.current_field(), b"b");
assert!(cursor.next_field());
assert_eq!(cursor.current_field(), b"c");
assert!(cursor.next_field()); assert_eq!(cursor.current_field(), b"1");
}
#[test]
fn test_goto_row() {
let csv = b"a,b\nc,d\ne,f\n";
let config = DsvConfig::default();
let index = build_index(csv, &config);
let mut cursor = DsvCursor::new(csv, &index);
assert!(cursor.goto_row(0));
assert_eq!(cursor.current_field(), b"a");
assert!(cursor.goto_row(1));
assert_eq!(cursor.current_field(), b"c");
assert!(cursor.goto_row(2));
assert_eq!(cursor.current_field(), b"e");
assert!(!cursor.goto_row(3)); }
#[test]
fn test_row_fields() {
let csv = b"a,b,c\n1,2,3\n";
let config = DsvConfig::default();
let index = build_index(csv, &config);
let cursor = DsvCursor::new(csv, &index);
let row = DsvRow::from_cursor(cursor);
let fields: Vec<_> = row.fields().collect();
assert_eq!(
fields,
vec![b"a".as_slice(), b"b".as_slice(), b"c".as_slice()]
);
}
#[test]
fn test_row_get() {
let csv = b"a,b,c\n";
let config = DsvConfig::default();
let index = build_index(csv, &config);
let cursor = DsvCursor::new(csv, &index);
let row = DsvRow::from_cursor(cursor);
assert_eq!(row.get(0), Some(b"a".as_slice()));
assert_eq!(row.get(1), Some(b"b".as_slice()));
assert_eq!(row.get(2), Some(b"c".as_slice()));
assert_eq!(row.get(3), None);
}
#[test]
fn test_strip_quotes() {
assert_eq!(strip_quotes(b"\"hello\""), b"hello");
assert_eq!(strip_quotes(b"hello"), b"hello");
assert_eq!(strip_quotes(b"\"\""), b"");
assert_eq!(strip_quotes(b"\""), b"\"");
}
}