use std::collections::HashMap;
use std::path::Path;
use crate::error::{Result, RurlError};
pub type DataRow = HashMap<String, String>;
#[derive(Debug)]
pub struct DataFile {
rows: Vec<DataRow>,
columns: Vec<String>,
}
impl DataFile {
pub fn from_path(path: &Path) -> Result<Self> {
let ext = path
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_lowercase())
.unwrap_or_default();
let content = std::fs::read_to_string(path)?;
match ext.as_str() {
"csv" => Self::parse_csv(&content),
"json" => Self::parse_json(&content),
other => Err(RurlError::DataFileError(format!(
"Unsupported file extension: {}",
other
))),
}
}
pub fn rows(&self) -> &[DataRow] {
&self.rows
}
pub fn columns(&self) -> &[String] {
&self.columns
}
pub fn len(&self) -> usize {
self.rows.len()
}
pub fn is_empty(&self) -> bool {
self.rows.is_empty()
}
fn parse_csv(content: &str) -> Result<Self> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(true)
.from_reader(content.as_bytes());
let headers: Vec<String> = reader
.headers()
.map_err(|e| RurlError::DataFileError(format!("CSV header error: {}", e)))?
.iter()
.map(|h| h.to_string())
.collect();
if headers.is_empty() {
return Err(RurlError::DataFileError("CSV has no headers".to_string()));
}
let mut rows = Vec::new();
for result in reader.records() {
let record = result
.map_err(|e| RurlError::DataFileError(format!("CSV parse error: {}", e)))?;
let row: DataRow = headers
.iter()
.zip(record.iter())
.map(|(k, v)| (k.clone(), v.to_string()))
.collect();
rows.push(row);
}
if rows.is_empty() {
return Err(RurlError::DataFileError(
"CSV file has headers but no data rows".to_string(),
));
}
Ok(Self {
rows,
columns: headers,
})
}
fn parse_json(content: &str) -> Result<Self> {
if let Ok(array) = serde_json::from_str::<Vec<HashMap<String, serde_json::Value>>>(content)
{
return Self::from_value_rows(array);
}
let mut value_rows: Vec<HashMap<String, serde_json::Value>> = Vec::new();
for line in content.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let obj: HashMap<String, serde_json::Value> = serde_json::from_str(line)
.map_err(|e| RurlError::DataFileError(format!("JSON parse error: {}", e)))?;
value_rows.push(obj);
}
Self::from_value_rows(value_rows)
}
fn from_value_rows(value_rows: Vec<HashMap<String, serde_json::Value>>) -> Result<Self> {
if value_rows.is_empty() {
return Err(RurlError::DataFileError("JSON data is empty".to_string()));
}
let columns: Vec<String> = value_rows[0].keys().cloned().collect();
let rows: Vec<DataRow> = value_rows
.into_iter()
.map(|obj| {
obj.into_iter()
.map(|(k, v)| {
let s = match &v {
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
(k, s)
})
.collect()
})
.collect();
Ok(Self { rows, columns })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csv_basic() {
let csv = "name,age,city\nAlice,30,NYC\nBob,25,LA";
let df = DataFile::parse_csv(csv).unwrap();
assert_eq!(df.len(), 2);
assert_eq!(df.rows()[0].get("name").map(String::as_str), Some("Alice"));
assert_eq!(df.rows()[0].get("age").map(String::as_str), Some("30"));
assert_eq!(df.rows()[1].get("city").map(String::as_str), Some("LA"));
}
#[test]
fn test_csv_columns_extraction() {
let csv = "id,username,email\n1,alice,alice@example.com";
let df = DataFile::parse_csv(csv).unwrap();
let cols = df.columns();
assert!(cols.contains(&"id".to_string()));
assert!(cols.contains(&"username".to_string()));
assert!(cols.contains(&"email".to_string()));
assert_eq!(cols.len(), 3);
}
#[test]
fn test_csv_empty_content_error() {
let result = DataFile::parse_csv("");
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("Data file error"), "unexpected: {}", msg);
}
#[test]
fn test_csv_headers_no_data_rows_error() {
let csv = "name,age,city\n";
let result = DataFile::parse_csv(csv);
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("no data rows"), "unexpected: {}", msg);
}
#[test]
fn test_json_array_basic() {
let json = r#"[{"name":"Alice","score":"100"},{"name":"Bob","score":"90"}]"#;
let df = DataFile::parse_json(json).unwrap();
assert_eq!(df.len(), 2);
let names: Vec<&str> = df.rows().iter().filter_map(|r| r.get("name").map(|s| s.as_str())).collect();
assert!(names.contains(&"Alice"));
assert!(names.contains(&"Bob"));
}
#[test]
fn test_json_number_coercion() {
let json = r#"[{"id": 42, "value": 3.14}]"#;
let df = DataFile::parse_json(json).unwrap();
assert_eq!(df.rows()[0].get("id").map(String::as_str), Some("42"));
assert_eq!(df.rows()[0].get("value").map(String::as_str), Some("3.14"));
}
#[test]
fn test_json_boolean_coercion() {
let json = r#"[{"active": true, "deleted": false}]"#;
let df = DataFile::parse_json(json).unwrap();
assert_eq!(df.rows()[0].get("active").map(String::as_str), Some("true"));
assert_eq!(df.rows()[0].get("deleted").map(String::as_str), Some("false"));
}
#[test]
fn test_json_empty_array_error() {
let result = DataFile::parse_json("[]");
assert!(result.is_err());
}
#[test]
fn test_json_columns_extraction() {
let json = r#"[{"x": "1", "y": "2", "z": "3"}]"#;
let df = DataFile::parse_json(json).unwrap();
let cols = df.columns();
assert_eq!(cols.len(), 3);
assert!(cols.contains(&"x".to_string()));
assert!(cols.contains(&"y".to_string()));
assert!(cols.contains(&"z".to_string()));
}
#[test]
fn test_csv_quoted_fields() {
let csv = "name,desc\n\"Alice\",\"says \"\"hello\"\"\"\n\"Bob\",\"a, b, c\"";
let df = DataFile::parse_csv(csv).unwrap();
assert_eq!(df.len(), 2);
assert_eq!(
df.rows()[0].get("name").map(String::as_str),
Some("Alice")
);
assert_eq!(
df.rows()[0].get("desc").map(String::as_str),
Some("says \"hello\"")
);
assert_eq!(
df.rows()[1].get("desc").map(String::as_str),
Some("a, b, c")
);
}
#[test]
fn test_json_ndjson() {
let ndjson = "{\"id\":\"1\"}\n{\"id\":\"2\"}";
let df = DataFile::parse_json(ndjson).unwrap();
assert_eq!(df.len(), 2);
let ids: Vec<&str> = df.rows().iter().filter_map(|r| r.get("id").map(String::as_str)).collect();
assert!(ids.contains(&"1"));
assert!(ids.contains(&"2"));
}
#[test]
fn test_json_mixed_coercion() {
let json = r#"[{"id": 1, "active": true, "score": 9.5}]"#;
let df = DataFile::parse_json(json).unwrap();
assert_eq!(df.rows()[0].get("id").map(String::as_str), Some("1"));
assert_eq!(df.rows()[0].get("active").map(String::as_str), Some("true"));
assert_eq!(df.rows()[0].get("score").map(String::as_str), Some("9.5"));
}
#[test]
fn test_from_path_csv() {
use std::io::Write;
let path = std::env::temp_dir().join("hurley_test_route.csv");
let mut f = std::fs::File::create(&path).expect("create temp csv");
write!(f, "x,y\n1,2\n3,4").unwrap();
let df = DataFile::from_path(&path).expect("parse csv via from_path");
let _ = std::fs::remove_file(&path);
assert_eq!(df.len(), 2);
assert_eq!(df.rows()[0].get("x").map(String::as_str), Some("1"));
}
#[test]
fn test_from_path_json() {
use std::io::Write;
let path = std::env::temp_dir().join("hurley_test_route.json");
let mut f = std::fs::File::create(&path).expect("create temp json");
write!(f, r#"[{{"k":"v1"}},{{"k":"v2"}}]"#).unwrap();
let df = DataFile::from_path(&path).expect("parse json via from_path");
let _ = std::fs::remove_file(&path);
assert_eq!(df.len(), 2);
let vals: Vec<&str> = df.rows().iter().filter_map(|r| r.get("k").map(String::as_str)).collect();
assert!(vals.contains(&"v1"));
assert!(vals.contains(&"v2"));
}
#[test]
fn test_extension_case_insensitive() {
use std::io::Write;
let path = std::env::temp_dir().join("hurley_test_case.CSV");
let mut f = std::fs::File::create(&path).expect("create temp .CSV file");
write!(f, "col\nval").unwrap();
let df = DataFile::from_path(&path).expect("parse .CSV (uppercase) via from_path");
let _ = std::fs::remove_file(&path);
assert_eq!(df.len(), 1);
assert_eq!(df.rows()[0].get("col").map(String::as_str), Some("val"));
}
#[test]
fn test_unsupported_extension_error() {
use std::io::Write;
let path = std::env::temp_dir().join("hurley_test_unsupported.txt");
let mut f = std::fs::File::create(&path).expect("create temp file");
write!(f, "hello").unwrap();
let result = DataFile::from_path(&path);
let _ = std::fs::remove_file(&path);
assert!(result.is_err());
let msg = result.unwrap_err().to_string();
assert!(msg.contains("Unsupported file extension"), "unexpected: {}", msg);
}
}