pub fn parse_labels(content: &str) -> Vec<String> {
let trimmed = content.trim();
let raw: Vec<String> = if trimmed.starts_with('[') {
serde_json::from_str::<Vec<String>>(trimmed).unwrap_or_default()
} else {
trimmed
.split(['\n', ','])
.map(|s| s.trim().to_string())
.collect()
};
let mut out: Vec<String> = Vec::new();
for label in raw {
let norm = label.trim().to_lowercase();
if norm.is_empty() || out.contains(&norm) {
continue;
}
out.push(norm);
}
if out.is_empty() {
out.push("unknown".to_string());
}
out
}
pub fn validate_labels(labels: Vec<String>, allowed: &[String]) -> Vec<String> {
if allowed.is_empty() {
return labels;
}
let allow_lower: Vec<String> = allowed.iter().map(|a| a.to_lowercase()).collect();
let kept: Vec<String> = labels
.into_iter()
.filter(|l| allow_lower.contains(&l.to_lowercase()))
.collect();
if kept.is_empty() {
vec!["unknown".to_string()]
} else {
kept
}
}
#[cfg(test)]
mod tests {
use super::*;
fn v(s: &[&str]) -> Vec<String> {
s.iter().map(|x| x.to_string()).collect()
}
#[test]
fn parses_comma_list() {
assert_eq!(parse_labels("receipt, ticket"), v(&["receipt", "ticket"]));
}
#[test]
fn parses_newlines_and_trims_and_lowercases() {
assert_eq!(
parse_labels("Receipt\n TICKET \n"),
v(&["receipt", "ticket"])
);
}
#[test]
fn parses_json_array() {
assert_eq!(
parse_labels("[\"receipt\", \"ticket\"]"),
v(&["receipt", "ticket"])
);
}
#[test]
fn dedups_preserving_order() {
assert_eq!(
parse_labels("receipt, ticket, receipt"),
v(&["receipt", "ticket"])
);
}
#[test]
fn empty_becomes_unknown() {
assert_eq!(parse_labels(" "), v(&["unknown"]));
}
#[test]
fn validate_keeps_in_set_only() {
let allowed = v(&["invoice", "receipt", "ticket"]);
assert_eq!(
validate_labels(v(&["receipt", "spam"]), &allowed),
v(&["receipt"])
);
}
#[test]
fn validate_none_in_set_is_unknown() {
let allowed = v(&["invoice"]);
assert_eq!(validate_labels(v(&["spam"]), &allowed), v(&["unknown"]));
}
#[test]
fn validate_empty_allowed_is_passthrough() {
assert_eq!(validate_labels(v(&["x", "y"]), &[]), v(&["x", "y"]));
}
}