use crate::error::{Error, Result};
use crate::fs::{try_read_resource_dir, validate_resource_name, write_atomic};
use crate::resource::{Catalog, CatalogFieldType, CatalogItemRow, CatalogItems};
use std::collections::{BTreeSet, HashMap};
use std::path::Path;
const SCHEMA_HEADER: &str =
"# Generated by braze-sync. Edit and run `braze-sync apply` to sync to Braze.\n";
const SCHEMA_FILE_NAME: &str = "schema.yaml";
pub fn load_all_schemas(catalogs_root: &Path) -> Result<Vec<Catalog>> {
let Some(read_dir) = try_read_resource_dir(catalogs_root, "catalogs")? else {
return Ok(Vec::new());
};
let mut schemas = Vec::new();
for entry in read_dir {
let entry = entry?;
if !entry.file_type()?.is_dir() {
tracing::debug!(path = %entry.path().display(), "skipping non-directory entry");
continue;
}
let dir = entry.path();
let schema_path = dir.join(SCHEMA_FILE_NAME);
if !schema_path.is_file() {
continue;
}
let cat = read_schema_file(&schema_path)?;
let dir_name = entry.file_name().to_string_lossy().into_owned();
if cat.name != dir_name {
return Err(Error::InvalidFormat {
path: schema_path,
message: format!(
"schema name '{}' does not match its catalog directory '{}'",
cat.name, dir_name
),
});
}
schemas.push(cat);
}
schemas.sort_by(|a, b| a.name.cmp(&b.name));
Ok(schemas)
}
pub fn read_schema_file(path: &Path) -> Result<Catalog> {
let bytes = std::fs::read_to_string(path)?;
let cat: Catalog = serde_norway::from_str(&bytes).map_err(|source| Error::YamlParse {
path: path.to_path_buf(),
source,
})?;
Ok(cat)
}
pub fn save_schema(catalogs_root: &Path, catalog: &Catalog) -> Result<()> {
validate_resource_name("catalog", &catalog.name)?;
let dir = catalogs_root.join(&catalog.name);
let path = dir.join(SCHEMA_FILE_NAME);
let normalized = catalog.normalized();
let yaml = serde_norway::to_string(&normalized).map_err(|e| Error::InvalidFormat {
path: path.clone(),
message: format!("yaml serialization failed: {e}"),
})?;
let mut content = String::with_capacity(SCHEMA_HEADER.len() + yaml.len());
content.push_str(SCHEMA_HEADER);
content.push_str(&yaml);
write_atomic(&path, content.as_bytes())?;
Ok(())
}
pub(crate) const ITEMS_FILE_NAME: &str = "items.csv";
const ITEMS_ID_COLUMN: &str = "id";
const MISSING_ID_COLUMN_MSG: &str = "items.csv is missing required 'id' column";
fn catalog_name_from_items_path(path: &Path) -> Result<String> {
path.parent()
.and_then(|p| p.file_name())
.and_then(|n| n.to_str())
.ok_or_else(|| Error::InvalidFormat {
path: path.to_path_buf(),
message: "cannot determine catalog name from items.csv path".into(),
})
.map(String::from)
}
fn load_sibling_field_types(items_path: &Path) -> Option<HashMap<String, CatalogFieldType>> {
let schema_path = items_path.parent()?.join(SCHEMA_FILE_NAME);
let catalog = read_schema_file(&schema_path).ok()?;
Some(
catalog
.fields
.into_iter()
.map(|f| (f.name, f.field_type))
.collect(),
)
}
fn parse_csv_cell_typed(s: &str, field_type: Option<CatalogFieldType>) -> serde_json::Value {
match field_type {
Some(CatalogFieldType::String | CatalogFieldType::Time) => {
serde_json::Value::String(s.to_string())
}
Some(CatalogFieldType::Boolean) => match s {
"true" => serde_json::Value::Bool(true),
"false" => serde_json::Value::Bool(false),
_ => serde_json::Value::String(s.to_string()),
},
Some(CatalogFieldType::Number) => {
if let Ok(n) = s.parse::<i64>() {
return serde_json::Value::Number(n.into());
}
if let Ok(n) = s.parse::<f64>() {
if let Some(n) = serde_json::Number::from_f64(n) {
return serde_json::Value::Number(n);
}
}
serde_json::Value::String(s.to_string())
}
_ => parse_csv_cell(s),
}
}
fn parse_csv_cell(s: &str) -> serde_json::Value {
if let Ok(n) = s.parse::<i64>() {
return serde_json::Value::Number(n.into());
}
if let Ok(n) = s.parse::<f64>() {
if let Some(n) = serde_json::Number::from_f64(n) {
return serde_json::Value::Number(n);
}
}
match s {
"true" => return serde_json::Value::Bool(true),
"false" => return serde_json::Value::Bool(false),
_ => {}
}
serde_json::Value::String(s.to_string())
}
fn value_to_csv_cell(v: &serde_json::Value) -> String {
match v {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Number(n) => n.to_string(),
serde_json::Value::Bool(b) => b.to_string(),
serde_json::Value::Null => String::new(),
other => serde_json::to_string(other).unwrap_or_default(),
}
}
pub fn load_items(path: &Path) -> Result<CatalogItems> {
load_items_inner(path, true)
}
pub fn load_item_hashes(path: &Path) -> Result<CatalogItems> {
load_items_inner(path, false)
}
fn load_items_inner(path: &Path, materialize_rows: bool) -> Result<CatalogItems> {
let catalog_name = catalog_name_from_items_path(path)?;
let field_types = load_sibling_field_types(path);
let mut reader = csv::Reader::from_path(path).map_err(|e| Error::CsvParse {
path: path.to_path_buf(),
source: e,
})?;
let headers = reader.headers().map_err(|e| Error::CsvParse {
path: path.to_path_buf(),
source: e,
})?;
let header_vec: Vec<String> = headers.iter().map(String::from).collect();
let id_col = header_vec
.iter()
.position(|h| h == ITEMS_ID_COLUMN)
.ok_or_else(|| Error::InvalidFormat {
path: path.to_path_buf(),
message: MISSING_ID_COLUMN_MSG.into(),
})?;
let mut sorted_cols: Vec<(usize, &str)> = header_vec
.iter()
.enumerate()
.filter(|(i, _)| *i != id_col)
.map(|(i, name)| (i, name.as_str()))
.collect();
sorted_cols.sort_by_key(|(_, name)| *name);
let mut rows = if materialize_rows {
Some(Vec::new())
} else {
None
};
let mut item_hashes = HashMap::new();
let mut json_buf: Vec<u8> = Vec::new();
for result in reader.records() {
let record = result.map_err(|e| Error::CsvParse {
path: path.to_path_buf(),
source: e,
})?;
let id = record.get(id_col).unwrap_or("").to_string();
if id.is_empty() {
return Err(Error::InvalidFormat {
path: path.to_path_buf(),
message: "items.csv contains a row with an empty 'id'".into(),
});
}
let hash = if materialize_rows {
let mut fields = serde_json::Map::new();
for (i, val) in record.iter().enumerate() {
if i == id_col {
continue;
}
if let Some(col_name) = header_vec.get(i) {
let ft = field_types
.as_ref()
.and_then(|ft| ft.get(col_name.as_str()))
.copied();
fields.insert(col_name.clone(), parse_csv_cell_typed(val, ft));
}
}
let h = CatalogItemRow::hash_fields(&fields);
if let Some(ref mut v) = rows {
v.push(CatalogItemRow {
id: id.clone(),
fields,
});
}
h
} else {
json_buf.clear();
json_buf.push(b'{');
for (j, &(col_idx, col_name)) in sorted_cols.iter().enumerate() {
if j > 0 {
json_buf.push(b',');
}
serde_json::to_writer(&mut json_buf, col_name)
.expect("string serialization is infallible");
json_buf.push(b':');
let val = record.get(col_idx).unwrap_or("");
let ft = field_types
.as_ref()
.and_then(|ft| ft.get(col_name))
.copied();
serde_json::to_writer(&mut json_buf, &parse_csv_cell_typed(val, ft))
.expect("value serialization is infallible");
}
json_buf.push(b'}');
blake3::hash(&json_buf).to_hex().to_string()
};
match item_hashes.entry(id) {
std::collections::hash_map::Entry::Occupied(e) => {
return Err(Error::InvalidFormat {
path: path.to_path_buf(),
message: format!("duplicate item id '{}' in items.csv", e.key()),
});
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(hash);
}
}
}
Ok(CatalogItems {
catalog_name,
item_hashes,
rows,
})
}
pub fn read_item_csv_columns(path: &Path) -> Result<(String, Vec<String>)> {
let catalog_name = catalog_name_from_items_path(path)?;
let mut reader = csv::Reader::from_path(path).map_err(|e| Error::CsvParse {
path: path.to_path_buf(),
source: e,
})?;
let headers = reader.headers().map_err(|e| Error::CsvParse {
path: path.to_path_buf(),
source: e,
})?;
if !headers.iter().any(|h| h == ITEMS_ID_COLUMN) {
return Err(Error::InvalidFormat {
path: path.to_path_buf(),
message: MISSING_ID_COLUMN_MSG.into(),
});
}
let columns: Vec<String> = headers
.iter()
.filter(|h| *h != ITEMS_ID_COLUMN)
.map(String::from)
.collect();
Ok((catalog_name, columns))
}
pub fn save_items(catalogs_root: &Path, catalog_name: &str, rows: &[CatalogItemRow]) -> Result<()> {
validate_resource_name("catalog", catalog_name)?;
let mut field_names: BTreeSet<&str> = BTreeSet::new();
for row in rows {
for key in row.fields.keys() {
field_names.insert(key.as_str());
}
}
let mut header: Vec<&str> = Vec::with_capacity(1 + field_names.len());
header.push(ITEMS_ID_COLUMN);
header.extend(field_names.iter().copied());
let path = catalogs_root.join(catalog_name).join(ITEMS_FILE_NAME);
let mut wtr = csv::Writer::from_writer(Vec::new());
wtr.write_record(&header).map_err(|e| Error::CsvParse {
path: path.clone(),
source: e,
})?;
for row in rows {
let mut record: Vec<String> = Vec::with_capacity(header.len());
record.push(row.id.clone());
for &col in &header[1..] {
let val = row
.fields
.get(col)
.map(value_to_csv_cell)
.unwrap_or_default();
record.push(val);
}
wtr.write_record(&record).map_err(|e| Error::CsvParse {
path: path.clone(),
source: e,
})?;
}
let buf = wtr.into_inner().map_err(|e| Error::CsvParse {
path: path.clone(),
source: e.into_error().into(),
})?;
write_atomic(&path, &buf)?;
Ok(())
}
pub fn load_all_items(catalogs_root: &Path) -> Result<Vec<CatalogItems>> {
load_all_items_inner(catalogs_root, true)
}
pub fn load_all_item_hashes(catalogs_root: &Path) -> Result<Vec<CatalogItems>> {
load_all_items_inner(catalogs_root, false)
}
fn load_all_items_inner(catalogs_root: &Path, materialize_rows: bool) -> Result<Vec<CatalogItems>> {
let Some(read_dir) = try_read_resource_dir(catalogs_root, "catalogs")? else {
return Ok(Vec::new());
};
let mut items_list = Vec::new();
for entry in read_dir {
let entry = entry?;
if !entry.file_type()?.is_dir() {
continue;
}
let dir = entry.path();
let items_path = dir.join(ITEMS_FILE_NAME);
if !items_path.is_file() {
continue;
}
let items = load_items_inner(&items_path, materialize_rows)?;
items_list.push(items);
}
items_list.sort_by(|a, b| a.catalog_name.cmp(&b.catalog_name));
Ok(items_list)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::resource::{CatalogField, CatalogFieldType};
fn field(name: &str, t: CatalogFieldType) -> CatalogField {
CatalogField {
name: name.into(),
field_type: t,
}
}
fn cat(name: &str, fields: Vec<CatalogField>) -> Catalog {
Catalog {
name: name.into(),
description: Some(format!("{name} catalog")),
fields,
}
}
#[test]
fn round_trip_single_catalog() {
let dir = tempfile::tempdir().unwrap();
let c = cat(
"cardiology",
vec![
field("condition_id", CatalogFieldType::String),
field("display_order", CatalogFieldType::Number),
field("is_active", CatalogFieldType::Boolean),
],
);
save_schema(dir.path(), &c).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0], c.normalized());
}
#[test]
fn round_trip_all_field_types() {
let dir = tempfile::tempdir().unwrap();
let c = cat(
"everything",
vec![
field("a_string", CatalogFieldType::String),
field("b_number", CatalogFieldType::Number),
field("c_boolean", CatalogFieldType::Boolean),
field("d_time", CatalogFieldType::Time),
field("e_object", CatalogFieldType::Object),
field("f_array", CatalogFieldType::Array),
],
);
save_schema(dir.path(), &c).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded[0], c);
}
#[test]
fn save_sorts_fields_alphabetically() {
let dir = tempfile::tempdir().unwrap();
let c = Catalog {
name: "x".into(),
description: None,
fields: vec![
field("z", CatalogFieldType::String),
field("a", CatalogFieldType::String),
field("m", CatalogFieldType::String),
],
};
save_schema(dir.path(), &c).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded[0].fields[0].name, "a");
assert_eq!(loaded[0].fields[1].name, "m");
assert_eq!(loaded[0].fields[2].name, "z");
}
#[test]
fn load_multiple_catalogs_sorted_alphabetically() {
let dir = tempfile::tempdir().unwrap();
save_schema(
dir.path(),
&cat("zebra", vec![field("id", CatalogFieldType::String)]),
)
.unwrap();
save_schema(
dir.path(),
&cat("apple", vec![field("id", CatalogFieldType::String)]),
)
.unwrap();
save_schema(
dir.path(),
&cat("mango", vec![field("id", CatalogFieldType::String)]),
)
.unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(
loaded.iter().map(|c| c.name.as_str()).collect::<Vec<_>>(),
vec!["apple", "mango", "zebra"]
);
}
#[test]
fn missing_catalogs_root_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let nonexistent = dir.path().join("not_here");
let loaded = load_all_schemas(&nonexistent).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn empty_catalogs_root_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn catalogs_root_pointing_at_a_file_is_rejected() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("not_a_dir");
std::fs::write(&file_path, "x").unwrap();
let err = load_all_schemas(&file_path).unwrap_err();
assert!(matches!(err, Error::InvalidFormat { .. }), "got: {err:?}");
}
#[test]
fn dir_without_schema_yaml_is_silently_skipped() {
let dir = tempfile::tempdir().unwrap();
let lone = dir.path().join("lonely");
std::fs::create_dir_all(&lone).unwrap();
std::fs::write(lone.join("items.csv"), "id\n").unwrap();
save_schema(
dir.path(),
&cat("real", vec![field("id", CatalogFieldType::String)]),
)
.unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].name, "real");
}
#[test]
fn schema_name_mismatch_with_dir_name_is_an_error() {
let dir = tempfile::tempdir().unwrap();
let cat_dir = dir.path().join("on_disk_name");
std::fs::create_dir_all(&cat_dir).unwrap();
std::fs::write(
cat_dir.join("schema.yaml"),
"name: in_yaml_name\nfields: []\n",
)
.unwrap();
let err = load_all_schemas(dir.path()).unwrap_err();
match err {
Error::InvalidFormat { message, .. } => {
assert!(message.contains("on_disk_name"));
assert!(message.contains("in_yaml_name"));
}
other => panic!("expected InvalidFormat, got {other:?}"),
}
}
#[test]
fn unknown_field_in_schema_yaml_is_ignored_for_forward_compat() {
let dir = tempfile::tempdir().unwrap();
let cat_dir = dir.path().join("future");
std::fs::create_dir_all(&cat_dir).unwrap();
let yaml = "\
name: future
description: hi
future_v1_3_field: surprise
fields:
- name: id
type: string
";
std::fs::write(cat_dir.join("schema.yaml"), yaml).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].name, "future");
assert_eq!(loaded[0].fields.len(), 1);
}
#[test]
fn schema_yaml_with_header_comment_parses() {
let dir = tempfile::tempdir().unwrap();
let cat_dir = dir.path().join("commented");
std::fs::create_dir_all(&cat_dir).unwrap();
let yaml = "\
# Generated by braze-sync. Edit and run `braze-sync apply` to sync to Braze.
name: commented
fields:
- name: id
type: string
";
std::fs::write(cat_dir.join("schema.yaml"), yaml).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].name, "commented");
}
#[test]
fn save_writes_header_comment() {
let dir = tempfile::tempdir().unwrap();
let c = cat("hdr", vec![field("id", CatalogFieldType::String)]);
save_schema(dir.path(), &c).unwrap();
let raw = std::fs::read_to_string(dir.path().join("hdr/schema.yaml")).unwrap();
assert!(
raw.starts_with("# Generated by braze-sync."),
"missing header in: {raw}"
);
}
#[test]
fn save_creates_nested_directories() {
let dir = tempfile::tempdir().unwrap();
let nested = dir.path().join("braze").join("catalogs");
save_schema(
&nested,
&cat("derm", vec![field("id", CatalogFieldType::String)]),
)
.unwrap();
assert!(nested.join("derm").join("schema.yaml").is_file());
}
#[test]
fn save_rejects_path_traversal_in_name() {
let dir = tempfile::tempdir().unwrap();
for bad in ["../evil", "..", ".", "", "a/b", "a\\b"] {
let c = Catalog {
name: bad.into(),
description: None,
fields: vec![],
};
let err = save_schema(dir.path(), &c).unwrap_err();
assert!(
matches!(err, Error::InvalidFormat { .. }),
"name {bad:?} should be rejected; got {err:?}"
);
}
}
#[test]
fn atomic_save_leaves_no_temp_files() {
let dir = tempfile::tempdir().unwrap();
let c = cat("clean", vec![field("id", CatalogFieldType::String)]);
save_schema(dir.path(), &c).unwrap();
let cat_dir = dir.path().join("clean");
let entries: Vec<_> = std::fs::read_dir(&cat_dir)
.unwrap()
.map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
.collect();
assert_eq!(entries, vec!["schema.yaml".to_string()]);
}
#[test]
fn save_overwrites_existing_schema() {
let dir = tempfile::tempdir().unwrap();
let v1 = cat("ovr", vec![field("a", CatalogFieldType::String)]);
save_schema(dir.path(), &v1).unwrap();
let v2 = cat(
"ovr",
vec![
field("a", CatalogFieldType::String),
field("b", CatalogFieldType::Number),
],
);
save_schema(dir.path(), &v2).unwrap();
let loaded = load_all_schemas(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].fields.len(), 2);
}
fn make_row(id: &str, fields: &[(&str, serde_json::Value)]) -> CatalogItemRow {
let mut map = serde_json::Map::new();
for (k, v) in fields {
map.insert((*k).to_string(), v.clone());
}
CatalogItemRow {
id: id.into(),
fields: map,
}
}
fn write_items_csv(dir: &std::path::Path, catalog_name: &str, csv_text: &str) {
let cat_dir = dir.join(catalog_name);
std::fs::create_dir_all(&cat_dir).unwrap();
std::fs::write(cat_dir.join("items.csv"), csv_text).unwrap();
}
#[test]
fn items_round_trip() {
let dir = tempfile::tempdir().unwrap();
let rows = vec![
make_row(
"af001",
&[
("name", serde_json::json!("atrial")),
("order", serde_json::json!(1)),
],
),
make_row(
"af002",
&[
("name", serde_json::json!("ventricular")),
("order", serde_json::json!(2)),
],
),
];
save_items(dir.path(), "cardiology", &rows).unwrap();
let loaded = load_items(&dir.path().join("cardiology/items.csv")).unwrap();
assert_eq!(loaded.catalog_name, "cardiology");
assert_eq!(loaded.rows.as_ref().unwrap().len(), 2);
assert_eq!(loaded.item_hashes.len(), 2);
for row in loaded.rows.as_ref().unwrap() {
assert_eq!(
loaded.item_hashes[&row.id],
row.content_hash(),
"hash mismatch for id={}",
row.id,
);
}
}
#[test]
fn items_round_trip_preserves_types() {
let dir = tempfile::tempdir().unwrap();
let rows = vec![make_row(
"x",
&[
("num", serde_json::json!(42)),
("flag", serde_json::json!(true)),
("text", serde_json::json!("hello")),
],
)];
save_items(dir.path(), "typed", &rows).unwrap();
let loaded = load_items(&dir.path().join("typed/items.csv")).unwrap();
let r = &loaded.rows.as_ref().unwrap()[0];
assert_eq!(r.fields["num"], serde_json::json!(42));
assert_eq!(r.fields["flag"], serde_json::json!(true));
assert_eq!(r.fields["text"], serde_json::json!("hello"));
}
#[test]
fn items_missing_id_column_rejected() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "bad", "name,order\nfoo,1\n");
let err = load_items(&dir.path().join("bad/items.csv")).unwrap_err();
assert!(matches!(err, Error::InvalidFormat { .. }));
}
#[test]
fn items_duplicate_id_rejected() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "dup", "id,name\na,foo\na,bar\n");
let err = load_items(&dir.path().join("dup/items.csv")).unwrap_err();
match err {
Error::InvalidFormat { message, .. } => {
assert!(message.contains("duplicate"), "msg: {message}");
}
other => panic!("expected InvalidFormat, got {other:?}"),
}
}
#[test]
fn items_empty_id_rejected() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "empty", "id,name\n,foo\n");
let err = load_items(&dir.path().join("empty/items.csv")).unwrap_err();
assert!(matches!(err, Error::InvalidFormat { .. }));
}
#[test]
fn items_header_only_csv_round_trips() {
let dir = tempfile::tempdir().unwrap();
save_items(dir.path(), "empty_cat", &[]).unwrap();
let loaded = load_items(&dir.path().join("empty_cat/items.csv")).unwrap();
assert_eq!(loaded.catalog_name, "empty_cat");
assert!(loaded.rows.as_ref().unwrap().is_empty());
assert!(loaded.item_hashes.is_empty());
}
#[test]
fn items_field_order_in_csv_is_deterministic() {
let dir = tempfile::tempdir().unwrap();
let rows = vec![make_row(
"x",
&[
("z_last", serde_json::json!("z")),
("a_first", serde_json::json!("a")),
("m_mid", serde_json::json!("m")),
],
)];
save_items(dir.path(), "ordered", &rows).unwrap();
let csv_text = std::fs::read_to_string(dir.path().join("ordered/items.csv")).unwrap();
let header_line = csv_text.lines().next().unwrap();
assert_eq!(header_line, "id,a_first,m_mid,z_last");
}
#[test]
fn items_special_chars_round_trip() {
let dir = tempfile::tempdir().unwrap();
let rows = vec![make_row(
"x",
&[("val", serde_json::json!("has, commas \"and\" quotes"))],
)];
save_items(dir.path(), "special", &rows).unwrap();
let loaded = load_items(&dir.path().join("special/items.csv")).unwrap();
let r = &loaded.rows.as_ref().unwrap()[0];
assert_eq!(
r.fields["val"],
serde_json::json!("has, commas \"and\" quotes")
);
}
#[test]
fn load_all_items_missing_root_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let missing = dir.path().join("nope");
let loaded = load_all_items(&missing).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn load_all_items_skips_dirs_without_items_csv() {
let dir = tempfile::tempdir().unwrap();
let cat_dir = dir.path().join("schema_only");
std::fs::create_dir_all(&cat_dir).unwrap();
std::fs::write(
cat_dir.join("schema.yaml"),
"name: schema_only\nfields: []\n",
)
.unwrap();
write_items_csv(dir.path(), "with_items", "id,name\na,foo\n");
let loaded = load_all_items(dir.path()).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].catalog_name, "with_items");
}
#[test]
fn load_all_items_sorts_by_catalog_name() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "zebra", "id\nz1\n");
write_items_csv(dir.path(), "apple", "id\na1\n");
let loaded = load_all_items(dir.path()).unwrap();
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0].catalog_name, "apple");
assert_eq!(loaded[1].catalog_name, "zebra");
}
#[test]
fn items_save_rejects_path_traversal() {
let dir = tempfile::tempdir().unwrap();
let err = save_items(dir.path(), "../evil", &[]).unwrap_err();
assert!(matches!(err, Error::InvalidFormat { .. }));
}
fn write_schema_for_items(
dir: &std::path::Path,
catalog_name: &str,
fields: &[(&str, CatalogFieldType)],
) {
let catalog = Catalog {
name: catalog_name.into(),
description: None,
fields: fields
.iter()
.map(|(n, t)| CatalogField {
name: (*n).into(),
field_type: *t,
})
.collect(),
};
save_schema(dir, &catalog).unwrap();
}
#[test]
fn schema_aware_parsing_preserves_leading_zeros() {
let dir = tempfile::tempdir().unwrap();
write_schema_for_items(dir.path(), "codes", &[("code", CatalogFieldType::String)]);
write_items_csv(dir.path(), "codes", "id,code\na,007\nb,00123\n");
let loaded = load_items(&dir.path().join("codes/items.csv")).unwrap();
let rows = loaded.rows.as_ref().unwrap();
assert_eq!(rows[0].fields["code"], serde_json::json!("007"));
assert_eq!(rows[1].fields["code"], serde_json::json!("00123"));
}
#[test]
fn schema_aware_parsing_preserves_boolean_looking_strings() {
let dir = tempfile::tempdir().unwrap();
write_schema_for_items(dir.path(), "bools", &[("label", CatalogFieldType::String)]);
write_items_csv(dir.path(), "bools", "id,label\na,true\nb,false\n");
let loaded = load_items(&dir.path().join("bools/items.csv")).unwrap();
let rows = loaded.rows.as_ref().unwrap();
assert_eq!(rows[0].fields["label"], serde_json::json!("true"));
assert_eq!(rows[1].fields["label"], serde_json::json!("false"));
}
#[test]
fn schema_aware_number_field_still_parses_numbers() {
let dir = tempfile::tempdir().unwrap();
write_schema_for_items(dir.path(), "nums", &[("score", CatalogFieldType::Number)]);
write_items_csv(dir.path(), "nums", "id,score\na,42\nb,1.5\n");
let loaded = load_items(&dir.path().join("nums/items.csv")).unwrap();
let rows = loaded.rows.as_ref().unwrap();
assert_eq!(rows[0].fields["score"], serde_json::json!(42));
assert_eq!(rows[1].fields["score"], serde_json::json!(1.5));
}
#[test]
fn no_schema_falls_back_to_heuristic() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "bare", "id,val\na,007\nb,hello\n");
let loaded = load_items(&dir.path().join("bare/items.csv")).unwrap();
let rows = loaded.rows.as_ref().unwrap();
assert_eq!(rows[0].fields["val"], serde_json::json!(7));
assert_eq!(rows[1].fields["val"], serde_json::json!("hello"));
}
#[test]
fn schema_aware_hash_only_matches_materialize_path() {
let dir = tempfile::tempdir().unwrap();
write_schema_for_items(
dir.path(),
"mixed",
&[
("code", CatalogFieldType::String),
("count", CatalogFieldType::Number),
("active", CatalogFieldType::Boolean),
],
);
write_items_csv(dir.path(), "mixed", "id,code,count,active\na,007,42,true\n");
let csv_path = dir.path().join("mixed/items.csv");
let full = load_items(&csv_path).unwrap();
let hashes_only = load_item_hashes(&csv_path).unwrap();
assert_eq!(full.item_hashes, hashes_only.item_hashes);
let row = &full.rows.as_ref().unwrap()[0];
assert_eq!(row.fields["code"], serde_json::json!("007"));
}
#[test]
fn empty_csv_cell_becomes_empty_string() {
let dir = tempfile::tempdir().unwrap();
write_items_csv(dir.path(), "empty_cell", "id,name\na,\n");
let loaded = load_items(&dir.path().join("empty_cell/items.csv")).unwrap();
let rows = loaded.rows.as_ref().unwrap();
assert_eq!(rows[0].fields["name"], serde_json::json!(""));
}
#[test]
fn hash_only_path_matches_full_load_path() {
let dir = tempfile::tempdir().unwrap();
let rows = vec![
make_row(
"af001",
&[
("name", serde_json::json!("atrial")),
("order", serde_json::json!(1)),
("flag", serde_json::json!(true)),
],
),
make_row(
"af002",
&[
("name", serde_json::json!("ventricular")),
("order", serde_json::json!(2)),
("flag", serde_json::json!(false)),
],
),
];
save_items(dir.path(), "test", &rows).unwrap();
let csv_path = dir.path().join("test/items.csv");
let full = load_items(&csv_path).unwrap();
let hashes_only = load_item_hashes(&csv_path).unwrap();
assert_eq!(full.item_hashes, hashes_only.item_hashes);
assert!(hashes_only.rows.is_none());
}
}