use crate::get_path;
use crate::read::get_column;
use crate::Array;
use parquet2::error::Result;
fn verify_column_data(column: &str) -> Array {
match column {
"c0" => {
let expected = vec![1593604800, 1593604800, 1593604801, 1593604801];
let expected = expected.into_iter().map(Some).collect::<Vec<_>>();
Array::Int64(expected)
}
"c1" => {
let expected = vec!["abc", "def", "abc", "def"];
let expected = expected
.into_iter()
.map(|v| Some(v.as_bytes().to_vec()))
.collect::<Vec<_>>();
Array::Binary(expected)
}
"v11" => {
let expected = vec![42_f64, 7.7, 42.125, 7.7];
let expected = expected.into_iter().map(Some).collect::<Vec<_>>();
Array::Double(expected)
}
_ => unreachable!(),
}
}
#[test]
fn test_lz4_inference() -> Result<()> {
let files = [
"hadoop_lz4_compressed.parquet",
"non_hadoop_lz4_compressed.parquet",
];
let columns = ["c0", "c1", "v11"];
for file in files {
let mut path = get_path();
path.push(file);
let path = path.to_str().unwrap();
for column in columns {
let (result, _statistics) = get_column(path, column)?;
assert_eq!(result, verify_column_data(column), "of file {}", file);
}
}
Ok(())
}
#[test]
fn test_lz4_large_file() -> Result<()> {
let mut path = get_path();
let file = "hadoop_lz4_compressed_larger.parquet";
path.push(file);
let path = path.to_str().unwrap();
let (result, _statistics) = get_column(path, "a")?;
assert_eq!(result.len(), 10000);
Ok(())
}