use crate::metadata::MrType;
#[derive(Debug, Clone)]
pub struct RawMrSet {
pub name: String,
pub mr_type: MrType,
pub counted_value: Option<String>,
pub label: String,
pub var_names: Vec<String>,
pub uses_long_names: bool,
}
pub fn parse_mr_sets(data: &[u8]) -> Vec<RawMrSet> {
parse_mr_sets_inner(data, false)
}
pub fn parse_mr_sets_v2(data: &[u8]) -> Vec<RawMrSet> {
parse_mr_sets_inner(data, true)
}
fn parse_mr_sets_inner(data: &[u8], uses_long_names: bool) -> Vec<RawMrSet> {
let text = String::from_utf8_lossy(data);
let mut sets = Vec::new();
for line in text.split('\n') {
let line = line.trim_matches('\0').trim();
if line.is_empty() || !line.starts_with('$') {
continue;
}
if let Some(mr_set) = parse_one_mr_set(line, uses_long_names) {
sets.push(mr_set);
}
}
sets
}
fn parse_one_mr_set(text: &str, uses_long_names: bool) -> Option<RawMrSet> {
let text = text.strip_prefix('$')?;
let eq_pos = text.find('=')?;
let name = text[..eq_pos].to_string();
let rest = &text[eq_pos + 1..];
if rest.is_empty() {
return None;
}
let type_char = rest.as_bytes()[0] as char;
let rest = &rest[1..];
let (mr_type, counted_value, after_cv) = match type_char {
'D' => {
let (cv_len, after_len) = parse_number(rest)?;
let after_space = after_len.strip_prefix(' ').unwrap_or(after_len);
if after_space.len() < cv_len {
return None;
}
let counted_value = after_space[..cv_len].to_string();
let remainder = &after_space[cv_len..];
(MrType::MultipleDichotomy, Some(counted_value), remainder)
}
'E' => {
let rest = rest.trim_start();
let (_, after_ct) = parse_number(rest)?;
let after_ct = after_ct.strip_prefix(' ').unwrap_or(after_ct);
let (cv_len, after_len) = parse_number(after_ct)?;
let after_space = after_len.strip_prefix(' ').unwrap_or(after_len);
if after_space.len() < cv_len {
return None;
}
let counted_value = after_space[..cv_len].to_string();
let remainder = &after_space[cv_len..];
(MrType::MultipleDichotomy, Some(counted_value), remainder)
}
'C' => (MrType::MultipleCategory, None, rest),
_ => return None,
};
let trimmed = after_cv.trim_start();
let (label_len, after_label_len) = parse_number(trimmed)?;
let after_space = after_label_len.strip_prefix(' ').unwrap_or(after_label_len);
if after_space.len() < label_len {
let label = after_space.trim().to_string();
return Some(RawMrSet {
name,
mr_type,
counted_value,
label,
var_names: Vec::new(),
uses_long_names,
});
}
let label = after_space[..label_len].to_string();
let remainder = &after_space[label_len..];
let var_names: Vec<String> = remainder
.split_whitespace()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
Some(RawMrSet {
name,
mr_type,
counted_value,
label,
var_names,
uses_long_names,
})
}
fn parse_number(s: &str) -> Option<(usize, &str)> {
let end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
if end == 0 {
return None;
}
let n: usize = s[..end].parse().ok()?;
Some((n, &s[end..]))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_dichotomy_set() {
let data = b"$AD6=D1 1 16 AD6. QC Autofill ad6r1 ad6r2 ad6r3\n";
let sets = parse_mr_sets(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "AD6");
assert_eq!(sets[0].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[0].counted_value, Some("1".to_string()));
assert_eq!(sets[0].label, "AD6. QC Autofill");
assert_eq!(sets[0].var_names, vec!["ad6r1", "ad6r2", "ad6r3"]);
}
#[test]
fn test_parse_category_set() {
let data = b"$colors=C 15 Favorite Colors RED GREEN BLUE\n";
let sets = parse_mr_sets(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "colors");
assert_eq!(sets[0].mr_type, MrType::MultipleCategory);
assert_eq!(sets[0].counted_value, None);
assert_eq!(sets[0].label, "Favorite Colors");
assert_eq!(sets[0].var_names, vec!["RED", "GREEN", "BLUE"]);
}
#[test]
fn test_parse_multiple_sets() {
let data = b"$set1=D1 1 9 Label One V1 V2\n$set2=C 9 Label Two V3 V4\n";
let sets = parse_mr_sets(data);
assert_eq!(sets.len(), 2);
assert_eq!(sets[0].name, "set1");
assert_eq!(sets[0].label, "Label One");
assert_eq!(sets[0].var_names, vec!["V1", "V2"]);
assert_eq!(sets[1].name, "set2");
assert_eq!(sets[1].label, "Label Two");
assert_eq!(sets[1].var_names, vec!["V3", "V4"]);
}
#[test]
fn test_parse_number() {
assert_eq!(parse_number("123abc"), Some((123, "abc")));
assert_eq!(parse_number("1 rest"), Some((1, " rest")));
assert_eq!(parse_number("abc"), None);
}
#[test]
fn test_parse_multidigit_counted_value() {
let data = b"$test=D2 10 5 Label V1 V2\n";
let sets = parse_mr_sets(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].counted_value, Some("10".to_string()));
assert_eq!(sets[0].label, "Label");
assert_eq!(sets[0].var_names, vec!["V1", "V2"]);
assert!(!sets[0].uses_long_names);
}
#[test]
fn test_parse_v2_dichotomy() {
let data = b"$q7all=D1 1 11 All of Q7's q7_1 q7_2 q7_3\n";
let sets = parse_mr_sets_v2(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "q7all");
assert_eq!(sets[0].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[0].counted_value, Some("1".to_string()));
assert_eq!(sets[0].label, "All of Q7's");
assert_eq!(sets[0].var_names, vec!["q7_1", "q7_2", "q7_3"]);
assert!(sets[0].uses_long_names);
}
#[test]
fn test_parse_v2_extended_dichotomy() {
let data = b"$d=E 1 2 34 13 third mdgroup k l m\n";
let sets = parse_mr_sets_v2(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "d");
assert_eq!(sets[0].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[0].counted_value, Some("34".to_string()));
assert_eq!(sets[0].label, "third mdgroup");
assert_eq!(sets[0].var_names, vec!["k", "l", "m"]);
assert!(sets[0].uses_long_names);
}
#[test]
fn test_parse_v2_extended_labelsource_varlabel() {
let data = b"$e=E 11 6 choice 0 n o p\n";
let sets = parse_mr_sets_v2(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "e");
assert_eq!(sets[0].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[0].counted_value, Some("choice".to_string()));
assert_eq!(sets[0].label, "");
assert_eq!(sets[0].var_names, vec!["n", "o", "p"]);
}
#[test]
fn test_parse_v2_category() {
let data = b"$colors=C 15 Favorite Colors RED GREEN BLUE\n";
let sets = parse_mr_sets_v2(data);
assert_eq!(sets.len(), 1);
assert_eq!(sets[0].name, "colors");
assert_eq!(sets[0].mr_type, MrType::MultipleCategory);
assert!(sets[0].uses_long_names);
}
#[test]
fn test_parse_v2_mixed() {
let data =
b"$set1=D1 1 5 Label q7_1 q7_2\n$set2=E 1 2 ab 4 Test x y\n$set3=C 4 Cats a b c\n";
let sets = parse_mr_sets_v2(data);
assert_eq!(sets.len(), 3);
assert_eq!(sets[0].name, "set1");
assert_eq!(sets[0].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[1].name, "set2");
assert_eq!(sets[1].mr_type, MrType::MultipleDichotomy);
assert_eq!(sets[1].counted_value, Some("ab".to_string()));
assert_eq!(sets[2].name, "set3");
assert_eq!(sets[2].mr_type, MrType::MultipleCategory);
}
}