use zer_core::{
record::{FieldValue, Record},
schema::FieldKind,
};
use zer_schema::infer::SchemaInferrer;
const BRP_Q1_CSV: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../data/examples/brp_q1/brp_persons.csv"
);
const SIM_SNAP1_CSV: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../data/examples/sim/sim_subscribers.csv"
);
fn load_csv_as_records(path: &str) -> Vec<Record> {
let mut rdr = csv::Reader::from_path(path)
.unwrap_or_else(|_| panic!("CSV not found at {path}, run data generator first"));
let headers = rdr.headers().unwrap().clone();
let mut records = Vec::new();
let mut id: u64 = 1;
for result in rdr.records() {
let row = result.unwrap();
let mut r = Record::new(id);
for (i, header) in headers.iter().enumerate() {
let v = row.get(i).unwrap_or("").trim();
r = r.insert(
header,
if v.is_empty() {
FieldValue::Null
} else {
FieldValue::Text(v.into())
},
);
}
records.push(r);
id += 1;
}
records
}
#[test]
fn infer_brp_name_fields() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(kind_of("voornamen"), Some(FieldKind::Name), "voornamen should be Name");
assert_eq!(kind_of("achternaam"), Some(FieldKind::Name), "achternaam should be Name");
}
#[test]
fn infer_brp_date_field() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(
kind_of("geboortedatum"),
Some(FieldKind::Date),
"geboortedatum should be Date"
);
}
#[test]
fn infer_brp_id_field() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(kind_of("bsn"), Some(FieldKind::Id), "bsn should be Id");
}
#[test]
fn infer_brp_address_fields() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(
kind_of("straatnaam"),
Some(FieldKind::Address),
"straatnaam should be Address"
);
assert_eq!(
kind_of("huisnummer"),
Some(FieldKind::Address),
"huisnummer should be Address"
);
assert_eq!(
kind_of("postcode"),
Some(FieldKind::Address),
"postcode should be Address"
);
assert_eq!(
kind_of("woonplaats"),
Some(FieldKind::Address),
"woonplaats should be Address"
);
}
#[test]
fn infer_brp_produces_all_columns() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
assert_eq!(
schema.len(),
14,
"BRP schema should have 14 fields, got {}",
schema.len()
);
}
#[test]
fn infer_sim_phone_field() {
let records = load_csv_as_records(SIM_SNAP1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(kind_of("msisdn"), Some(FieldKind::Phone), "msisdn should be Phone");
}
#[test]
fn infer_sim_id_fields() {
let records = load_csv_as_records(SIM_SNAP1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(kind_of("imsi"), Some(FieldKind::Id), "imsi should be Id");
assert_eq!(kind_of("iccid"), Some(FieldKind::Id), "iccid should be Id");
assert_eq!(
kind_of("document_nummer"),
Some(FieldKind::Id),
"document_nummer should be Id"
);
}
#[test]
fn infer_sim_date_field() {
let records = load_csv_as_records(SIM_SNAP1_CSV);
let schema = SchemaInferrer::new().infer(&records).unwrap();
let kind_of = |n: &str| schema.fields.iter().find(|f| f.name == n).map(|f| f.kind);
assert_eq!(
kind_of("activatiedatum"),
Some(FieldKind::Date),
"activatiedatum should be Date"
);
assert_eq!(
kind_of("geboortedatum"),
Some(FieldKind::Date),
"geboortedatum should be Date"
);
}
#[test]
fn override_beats_inference_on_real_data() {
let records = load_csv_as_records(BRP_Q1_CSV);
let schema = SchemaInferrer::new()
.override_field("geboortedatum", FieldKind::Id)
.infer(&records)
.unwrap();
let dob = schema.fields.iter().find(|f| f.name == "geboortedatum").unwrap();
assert_eq!(
dob.kind,
FieldKind::Id,
"override must win over heuristics on real BRP data"
);
}