1use arrow::datatypes::{DataType, Schema, TimeUnit};
2use indexmap::IndexMap;
3
4use crate::constants::{Alignment, Compression, Measure, Role};
5use crate::variable::MissingValues;
6
7#[derive(Debug, Clone)]
9pub enum Value {
10 Numeric(f64),
11 String(String),
12}
13
14impl std::hash::Hash for Value {
17 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
18 match self {
19 Value::Numeric(v) => {
20 0_u8.hash(state);
21 v.to_bits().hash(state);
22 }
23 Value::String(s) => {
24 1_u8.hash(state);
25 s.hash(state);
26 }
27 }
28 }
29}
30
31impl PartialEq for Value {
32 fn eq(&self, other: &Self) -> bool {
33 match (self, other) {
34 (Value::Numeric(a), Value::Numeric(b)) => a.to_bits() == b.to_bits(),
35 (Value::String(a), Value::String(b)) => a == b,
36 _ => false,
37 }
38 }
39}
40
41impl Eq for Value {}
42
43impl PartialOrd for Value {
44 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
45 Some(self.cmp(other))
46 }
47}
48
49impl Ord for Value {
50 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
51 match (self, other) {
52 (Value::Numeric(a), Value::Numeric(b)) => {
53 a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
54 }
55 (Value::String(a), Value::String(b)) => a.cmp(b),
56 (Value::Numeric(_), Value::String(_)) => std::cmp::Ordering::Less,
58 (Value::String(_), Value::Numeric(_)) => std::cmp::Ordering::Greater,
59 }
60 }
61}
62
63impl std::fmt::Display for Value {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 match self {
66 Value::Numeric(v) => {
67 if v.fract() == 0.0 && v.is_finite() {
69 write!(f, "{}", *v as i64)
70 } else {
71 write!(f, "{v}")
72 }
73 }
74 Value::String(s) => write!(f, "{s}"),
75 }
76 }
77}
78
79#[derive(Debug, Clone)]
81pub enum MissingSpec {
82 Value(f64),
84 Range { lo: f64, hi: f64 },
86 StringValue(String),
88}
89
90pub fn specs_to_missing(specs: &[MissingSpec]) -> MissingValues {
92 if specs.is_empty() {
93 return MissingValues::None;
94 }
95 let mut ranges: Vec<(f64, f64)> = Vec::new();
97 let mut discrete_f64: Vec<f64> = Vec::new();
98 let mut discrete_str: Vec<Vec<u8>> = Vec::new();
99 for spec in specs {
100 match spec {
101 MissingSpec::Range { lo, hi } => ranges.push((*lo, *hi)),
102 MissingSpec::Value(v) => discrete_f64.push(*v),
103 MissingSpec::StringValue(s) => {
104 let mut bytes = s.as_bytes().to_vec();
105 bytes.resize(8, b' ');
106 discrete_str.push(bytes);
107 }
108 }
109 }
110 if !discrete_str.is_empty() {
111 return MissingValues::DiscreteString(discrete_str);
112 }
113 if let Some((lo, hi)) = ranges.first() {
114 if let Some(&val) = discrete_f64.first() {
115 return MissingValues::RangeAndValue {
116 low: *lo,
117 high: *hi,
118 value: val,
119 };
120 }
121 return MissingValues::Range {
122 low: *lo,
123 high: *hi,
124 };
125 }
126 MissingValues::DiscreteNumeric(discrete_f64)
127}
128
129pub fn missing_to_specs(mv: &MissingValues) -> Vec<MissingSpec> {
131 match mv {
132 MissingValues::None => vec![],
133 MissingValues::DiscreteNumeric(vals) => {
134 vals.iter().map(|&v| MissingSpec::Value(v)).collect()
135 }
136 MissingValues::Range { low, high } => {
137 vec![MissingSpec::Range {
138 lo: *low,
139 hi: *high,
140 }]
141 }
142 MissingValues::RangeAndValue { low, high, value } => {
143 vec![
144 MissingSpec::Range {
145 lo: *low,
146 hi: *high,
147 },
148 MissingSpec::Value(*value),
149 ]
150 }
151 MissingValues::DiscreteString(vals) => vals
152 .iter()
153 .map(|v| MissingSpec::StringValue(String::from_utf8_lossy(v).trim_end().to_string()))
154 .collect(),
155 }
156}
157
158#[derive(Debug, Clone)]
160pub struct MrSet {
161 pub name: String,
162 pub label: String,
163 pub mr_type: MrType,
164 pub counted_value: Option<String>,
165 pub variables: Vec<String>,
166}
167
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub enum MrType {
170 MultipleDichotomy,
171 MultipleCategory,
172}
173
174#[derive(Debug, Clone)]
176pub struct SpssMetadata {
177 pub file_label: String,
179 pub file_encoding: String,
180 pub compression: Compression,
181 pub creation_time: String,
182 pub notes: Vec<String>,
183 pub number_rows: Option<i64>,
184 pub number_columns: usize,
185 pub file_format: String,
186
187 pub variable_names: Vec<String>,
189
190 pub variable_labels: IndexMap<String, String>,
192
193 pub variable_formats: IndexMap<String, String>,
195 pub arrow_data_types: IndexMap<String, String>,
196
197 pub variable_value_labels: IndexMap<String, IndexMap<Value, String>>,
199
200 pub variable_alignments: IndexMap<String, Alignment>,
202 pub variable_storage_widths: IndexMap<String, usize>,
203 pub variable_display_widths: IndexMap<String, u32>,
204 pub variable_measures: IndexMap<String, Measure>,
205
206 pub variable_missing_values: IndexMap<String, Vec<MissingSpec>>,
208
209 pub mr_sets: IndexMap<String, MrSet>,
211 pub variable_roles: IndexMap<String, Role>,
212 pub variable_attributes: IndexMap<String, IndexMap<String, Vec<String>>>,
213 pub weight_variable: Option<String>,
214}
215
216impl SpssMetadata {
217 pub fn label(&self, name: &str) -> Option<&str> {
219 self.variable_labels.get(name).map(|s| s.as_str())
220 }
221
222 pub fn value_labels(&self, name: &str) -> Option<&IndexMap<Value, String>> {
224 self.variable_value_labels.get(name)
225 }
226
227 pub fn format(&self, name: &str) -> Option<&str> {
229 self.variable_formats.get(name).map(|s| s.as_str())
230 }
231
232 pub fn measure(&self, name: &str) -> Option<Measure> {
234 self.variable_measures.get(name).copied()
235 }
236
237 pub fn role(&self, name: &str) -> Option<Role> {
239 self.variable_roles.get(name).copied()
240 }
241
242 pub fn attributes(&self, var_name: &str) -> Option<&IndexMap<String, Vec<String>>> {
244 self.variable_attributes.get(var_name)
245 }
246
247 pub fn attribute(&self, var_name: &str, attr_name: &str) -> Option<&Vec<String>> {
249 self.variable_attributes.get(var_name)?.get(attr_name)
250 }
251
252 pub fn from_arrow_schema(schema: &Schema) -> Self {
254 let mut meta = SpssMetadata {
255 file_encoding: "UTF-8".to_string(),
256 file_format: "sav".to_string(),
257 number_columns: schema.fields().len(),
258 ..Default::default()
259 };
260
261 for field in schema.fields() {
262 let name = field.name().clone();
263 meta.variable_names.push(name.clone());
264
265 let (fmt_str, rust_type, measure, alignment) = match field.data_type() {
266 DataType::Float64 => ("F8.2".to_string(), "f64", Measure::Scale, Alignment::Right),
267 DataType::Int64 | DataType::Int32 | DataType::Int16 | DataType::Int8 => {
268 ("F8.0".to_string(), "f64", Measure::Scale, Alignment::Right)
269 }
270 DataType::Boolean => (
271 "F1.0".to_string(),
272 "f64",
273 Measure::Nominal,
274 Alignment::Right,
275 ),
276 DataType::Date32 => (
277 "DATE11".to_string(),
278 "Date32",
279 Measure::Scale,
280 Alignment::Right,
281 ),
282 DataType::Timestamp(TimeUnit::Microsecond, _) => (
283 "DATETIME23.2".to_string(),
284 "Timestamp[us]",
285 Measure::Scale,
286 Alignment::Right,
287 ),
288 DataType::Duration(TimeUnit::Microsecond) => (
289 "TIME11.2".to_string(),
290 "Duration[us]",
291 Measure::Scale,
292 Alignment::Right,
293 ),
294 DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => (
295 "A255".to_string(),
296 "String",
297 Measure::Nominal,
298 Alignment::Left,
299 ),
300 _ => ("F8.2".to_string(), "f64", Measure::Scale, Alignment::Right),
301 };
302
303 let sw = if let Some(width_str) = fmt_str.strip_prefix('A') {
306 width_str.parse::<usize>().unwrap_or(255)
307 } else {
308 8
309 };
310 meta.variable_formats.insert(name.clone(), fmt_str);
311 meta.arrow_data_types
312 .insert(name.clone(), rust_type.to_string());
313 meta.variable_measures.insert(name.clone(), measure);
314 meta.variable_alignments.insert(name.clone(), alignment);
315 meta.variable_display_widths.insert(name.clone(), 8);
316 meta.variable_storage_widths.insert(name.clone(), sw);
317 }
318
319 meta
320 }
321}
322
323impl Default for SpssMetadata {
324 fn default() -> Self {
325 SpssMetadata {
326 file_label: String::new(),
327 file_encoding: "UTF-8".to_string(),
328 compression: Compression::None,
329 creation_time: String::new(),
330 notes: Vec::new(),
331 number_rows: None,
332 number_columns: 0,
333 file_format: "sav".to_string(),
334 variable_names: Vec::new(),
335 variable_labels: IndexMap::new(),
336 variable_formats: IndexMap::new(),
337 arrow_data_types: IndexMap::new(),
338 variable_value_labels: IndexMap::new(),
339 variable_alignments: IndexMap::new(),
340 variable_storage_widths: IndexMap::new(),
341 variable_display_widths: IndexMap::new(),
342 variable_measures: IndexMap::new(),
343 variable_missing_values: IndexMap::new(),
344 mr_sets: IndexMap::new(),
345 variable_roles: IndexMap::new(),
346 variable_attributes: IndexMap::new(),
347 weight_variable: None,
348 }
349 }
350}
351
352const MONTH_ABBR: [&str; 12] = [
357 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
358];
359
360pub(crate) fn format_spss_datetime(date_str: &str, time_str: &str) -> String {
362 let parts: Vec<&str> = date_str.split_whitespace().collect();
363 if parts.len() == 3 {
364 let day: u32 = parts[0].parse().unwrap_or(0);
365 let month = MONTH_ABBR
366 .iter()
367 .position(|&m| m.eq_ignore_ascii_case(parts[1]))
368 .map(|i| i + 1)
369 .unwrap_or(0);
370 let yy: u32 = parts[2].parse().unwrap_or(0);
371 let year = 2000 + yy;
372 if day > 0 && month > 0 {
373 return format!("{year:04}-{month:02}-{day:02} {time_str}");
374 }
375 }
376 format!("{date_str} {time_str}")
377}
378
379pub(crate) fn parse_iso_to_spss_parts(datetime: &str) -> Option<(String, String)> {
382 let (date_part, time_part) = datetime.split_once(' ')?;
383 let segs: Vec<&str> = date_part.split('-').collect();
384 if segs.len() != 3 {
385 return None;
386 }
387 let year: u32 = segs[0].parse().ok()?;
388 let month: usize = segs[1].parse().ok()?;
389 let day: u32 = segs[2].parse().ok()?;
390 if month == 0 || month > 12 {
391 return None;
392 }
393 let yy = year % 100;
394 let date = format!("{:02} {} {:02}", day, MONTH_ABBR[month - 1], yy);
395 Some((date, time_part.to_string()))
396}