mod config;
mod cursor;
mod index;
mod index_lightweight;
mod parser;
pub mod simd;
pub use config::DsvConfig;
pub use cursor::{DsvCursor, DsvFields, DsvRow, DsvRows};
pub use index::DsvIndex;
#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
pub use simd::build_index_simd as build_index;
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
pub use parser::build_index;
pub use parser::build_index as build_index_scalar;
#[cfg(not(test))]
use alloc::vec::Vec;
#[derive(Clone, Debug)]
pub struct Dsv {
text: Vec<u8>,
index: DsvIndex,
}
impl Dsv {
pub fn parse(text: &[u8]) -> Self {
Self::parse_with_config(text, &DsvConfig::default())
}
pub fn parse_with_config(text: &[u8], config: &DsvConfig) -> Self {
let index = build_index(text, config);
Self {
text: text.to_vec(),
index,
}
}
pub fn cursor(&self) -> DsvCursor<'_> {
DsvCursor::new(&self.text, &self.index)
}
pub fn rows(&self) -> DsvRows<'_> {
DsvRows::new(&self.text, &self.index)
}
pub fn row_count(&self) -> usize {
self.index.row_count()
}
pub fn row(&self, n: usize) -> Option<DsvRow<'_>> {
let mut cursor = self.cursor();
if cursor.goto_row(n) {
Some(DsvRow::from_cursor(cursor))
} else {
None
}
}
pub fn text(&self) -> &[u8] {
&self.text
}
pub fn index(&self) -> &DsvIndex {
&self.index
}
}
#[derive(Clone, Copy, Debug)]
pub struct DsvRef<'a> {
text: &'a [u8],
index: &'a DsvIndex,
}
impl<'a> DsvRef<'a> {
pub fn new(text: &'a [u8], index: &'a DsvIndex) -> Self {
Self { text, index }
}
pub fn cursor(&self) -> DsvCursor<'a> {
DsvCursor::new(self.text, self.index)
}
pub fn rows(&self) -> DsvRows<'a> {
DsvRows::new(self.text, self.index)
}
pub fn row_count(&self) -> usize {
self.index.row_count()
}
pub fn row(&self, n: usize) -> Option<DsvRow<'a>> {
let mut cursor = self.cursor();
if cursor.goto_row(n) {
Some(DsvRow::from_cursor(cursor))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_csv() {
let csv = b"a,b,c\n1,2,3\n";
let dsv = Dsv::parse(csv);
assert_eq!(dsv.row_count(), 2);
let row0: Vec<_> = dsv.row(0).unwrap().fields().collect();
assert_eq!(
row0,
vec![b"a".as_slice(), b"b".as_slice(), b"c".as_slice()]
);
let row1: Vec<_> = dsv.row(1).unwrap().fields().collect();
assert_eq!(
row1,
vec![b"1".as_slice(), b"2".as_slice(), b"3".as_slice()]
);
}
#[test]
fn test_quoted_fields() {
let csv = b"\"hello, world\",b\n";
let dsv = Dsv::parse(csv);
let row: Vec<_> = dsv.row(0).unwrap().fields().collect();
assert_eq!(row.len(), 2);
assert_eq!(row[0], b"\"hello, world\"");
assert_eq!(row[1], b"b");
}
#[test]
fn test_quoted_newlines() {
let csv = b"\"line1\nline2\",b\n";
let dsv = Dsv::parse(csv);
assert_eq!(dsv.row_count(), 1);
}
#[test]
fn test_empty_csv() {
let csv = b"";
let dsv = Dsv::parse(csv);
assert_eq!(dsv.row_count(), 0);
}
#[test]
fn test_single_field() {
let csv = b"hello\n";
let dsv = Dsv::parse(csv);
assert_eq!(dsv.row_count(), 1);
let row: Vec<_> = dsv.row(0).unwrap().fields().collect();
assert_eq!(row, vec![b"hello".as_slice()]);
}
#[test]
fn test_tsv() {
let tsv = b"a\tb\tc\n1\t2\t3\n";
let dsv = Dsv::parse_with_config(tsv, &DsvConfig::tsv());
let row0: Vec<_> = dsv.row(0).unwrap().fields().collect();
assert_eq!(
row0,
vec![b"a".as_slice(), b"b".as_slice(), b"c".as_slice()]
);
}
#[test]
fn test_row_iteration() {
let csv = b"a,b\n1,2\n3,4\n";
let dsv = Dsv::parse(csv);
let rows: Vec<Vec<_>> = dsv.rows().map(|r| r.fields().collect()).collect();
assert_eq!(rows.len(), 3);
assert_eq!(rows[0], vec![b"a".as_slice(), b"b".as_slice()]);
assert_eq!(rows[1], vec![b"1".as_slice(), b"2".as_slice()]);
assert_eq!(rows[2], vec![b"3".as_slice(), b"4".as_slice()]);
}
#[test]
fn test_get_column() {
let csv = b"name,age,city\nAlice,30,NYC\n";
let dsv = Dsv::parse(csv);
let row = dsv.row(1).unwrap();
assert_eq!(row.get(0), Some(b"Alice".as_slice()));
assert_eq!(row.get(1), Some(b"30".as_slice()));
assert_eq!(row.get(2), Some(b"NYC".as_slice()));
assert_eq!(row.get(3), None);
}
}