use ax_core::{AxError, RecordSet};
pub mod infer;
pub mod parser;
pub mod parsers;
pub mod table;
pub use parser::{Confidence, FormatParser, ParserRegistry};
pub fn normalize(source: &str, bytes: &[u8]) -> Result<RecordSet, AxError> {
ParserRegistry::default().normalize(source, bytes)
}
pub fn normalize_with(id: &str, source: &str, bytes: &[u8]) -> Result<RecordSet, AxError> {
ParserRegistry::default().normalize_with(id, source, bytes)
}
#[cfg(test)]
mod tests {
use super::*;
use ax_core::{ColType, Value};
#[test]
fn csv_end_to_end() {
let rs = normalize("t.csv", b"a,b\n1,x\n2,\n3,z").unwrap();
assert_eq!(rs.format, "csv");
assert_eq!(rs.width(), 2);
assert_eq!(rs.rows(), 3);
assert_eq!(rs.column("a").unwrap().ty, ColType::Int);
assert_eq!(rs.column("b").unwrap().null_count(), 1);
}
#[test]
fn ndjson_end_to_end() {
let rs = normalize("-", b"{\"a\":1}\n{\"a\":2,\"b\":9}\n").unwrap();
assert_eq!(rs.format, "ndjson");
assert_eq!(rs.rows(), 2);
assert_eq!(rs.column("b").unwrap().null_count(), 1);
}
#[test]
fn json_end_to_end() {
let rs = normalize("d.json", br#"[{"x":10},{"x":20},{"x":30}]"#).unwrap();
assert_eq!(rs.format, "json");
assert_eq!(rs.rows(), 3);
assert_eq!(rs.column("x").unwrap().ty, ColType::Int);
}
#[test]
fn tsv_sniffed_from_content() {
let rs = normalize("-", b"a\tb\n1\t2\n3\t4").unwrap();
assert_eq!(rs.format, "tsv");
assert_eq!(rs.width(), 2);
}
#[test]
fn ragged_csv_pads_and_truncates() {
let rs = normalize("t.csv", b"a,b\n1\n2,3,4").unwrap();
assert_eq!(rs.rows(), 2);
assert_eq!(rs.column("b").unwrap().cells[0], Value::Null);
}
#[test]
fn unknown_format_errors() {
assert!(matches!(
normalize("-", &[0x00, 0x01, 0x02, 0xff]),
Err(AxError::UnknownFormat(_))
));
}
#[test]
fn normalize_with_explicit_id() {
let rs = normalize_with("csv", "x", b"a,b\n1,2").unwrap();
assert_eq!(rs.format, "csv");
assert!(normalize_with("nonesuch", "x", b"a,b").is_err());
}
#[cfg(feature = "polars")]
#[test]
fn parquet_routes_through_the_registry() {
use polars::prelude::*;
let mut df = df!["a" => [1i64, 2, 3], "b" => [4i64, 5, 6]].unwrap();
let mut buf = Vec::new();
ParquetWriter::new(&mut buf).finish(&mut df).unwrap();
let rs = normalize("t.parquet", &buf).unwrap();
assert_eq!(rs.format, "parquet");
assert_eq!(rs.width(), 2);
assert_eq!(rs.rows(), 3);
}
}