use std::path::Path;
use regex::Regex;
use crate::schema::DatasetSchema;
use crate::validate::Issue;
#[derive(Debug, Clone)]
pub enum Rule {
DatasetNamePattern {
regex: Regex,
},
VariableNamePattern {
regex: Regex,
},
DatasetNameMatchesFileStem,
RequireAsciiNames,
RequireAsciiLabels,
RequireAsciiCharacterValues,
DatasetNameMaxBytes(usize),
VariableNameMaxBytes(usize),
LabelMaxBytes(usize),
CharacterValueMaxBytes(usize),
MaxFileSizeGb(f64),
}
impl Rule {
#[must_use]
pub fn dataset_name_pattern(pattern: &str) -> Self {
Self::DatasetNamePattern {
regex: Regex::new(pattern).expect("invalid regex pattern"),
}
}
#[must_use]
pub fn variable_name_pattern(pattern: &str) -> Self {
Self::VariableNamePattern {
regex: Regex::new(pattern).expect("invalid regex pattern"),
}
}
#[must_use]
pub(crate) fn validate(
&self,
plan: &DatasetSchema,
file_path: Option<&Path>,
agency_name: &'static str,
) -> Vec<Issue> {
let mut issues = Vec::new();
match self {
Self::DatasetNamePattern { regex } => {
if !regex.is_match(&plan.domain_code) {
issues.push(Issue::DatasetNamePatternMismatch {
dataset: plan.domain_code.clone(),
agency: agency_name,
pattern: regex.as_str().to_string(),
});
}
}
Self::VariableNamePattern { regex } => {
for var in &plan.variables {
if !regex.is_match(&var.name) {
issues.push(Issue::VariableNamePatternMismatch {
variable: var.name.clone(),
agency: agency_name,
pattern: regex.as_str().to_string(),
});
}
}
}
Self::DatasetNameMatchesFileStem => {
if let Some(path) = file_path
&& let Some(stem) = path.file_stem().and_then(|s| s.to_str())
&& !stem.eq_ignore_ascii_case(&plan.domain_code)
{
issues.push(Issue::DatasetNameFileStemMismatch {
dataset: plan.domain_code.clone(),
stem: stem.to_string(),
});
}
}
Self::RequireAsciiNames => {
if !plan.domain_code.is_ascii() {
issues.push(Issue::NonAsciiDatasetName {
dataset: plan.domain_code.clone(),
});
}
for var in &plan.variables {
if !var.name.is_ascii() {
issues.push(Issue::NonAsciiVariableName {
variable: var.name.clone(),
});
}
}
}
Self::RequireAsciiLabels => {
if let Some(ref label) = plan.dataset_label
&& !label.is_ascii()
{
issues.push(Issue::NonAsciiDatasetLabel {
dataset: plan.domain_code.clone(),
});
}
for var in &plan.variables {
if !var.label.is_ascii() {
issues.push(Issue::NonAsciiVariableLabel {
variable: var.name.clone(),
});
}
}
}
Self::RequireAsciiCharacterValues => {
}
Self::DatasetNameMaxBytes(max) => {
if plan.domain_code.len() > *max {
issues.push(Issue::AgencyDatasetNameTooLong {
dataset: plan.domain_code.clone(),
max: *max,
actual: plan.domain_code.len(),
});
}
}
Self::VariableNameMaxBytes(max) => {
for var in &plan.variables {
if var.name.len() > *max {
issues.push(Issue::AgencyVariableNameTooLong {
variable: var.name.clone(),
max: *max,
actual: var.name.len(),
});
}
}
}
Self::LabelMaxBytes(max) => {
if let Some(ref label) = plan.dataset_label {
let byte_len = label.len();
if byte_len > *max {
issues.push(Issue::AgencyLabelTooLong {
name: plan.domain_code.clone(),
is_dataset: true,
max: *max,
actual: byte_len,
});
} else if !label.is_ascii() && byte_len >= (*max * 80 / 100) {
issues.push(Issue::MultiByteLabelNearLimit {
name: plan.domain_code.clone(),
is_dataset: true,
byte_count: byte_len,
max_bytes: *max,
char_count: label.chars().count(),
});
}
}
for var in &plan.variables {
let byte_len = var.label.len();
if byte_len > *max {
issues.push(Issue::AgencyLabelTooLong {
name: var.name.clone(),
is_dataset: false,
max: *max,
actual: byte_len,
});
} else if !var.label.is_ascii() && byte_len >= (*max * 80 / 100) {
issues.push(Issue::MultiByteLabelNearLimit {
name: var.name.clone(),
is_dataset: false,
byte_count: byte_len,
max_bytes: *max,
char_count: var.label.chars().count(),
});
}
}
}
Self::CharacterValueMaxBytes(max) => {
for var in &plan.variables {
if var.xpt_type.is_character() && var.length > *max {
issues.push(Issue::CharacterValueLengthExceeded {
variable: var.name.clone(),
length: var.length,
agency: agency_name,
max: *max,
});
}
}
}
Self::MaxFileSizeGb(_max) => {
}
}
issues
}
}