Skip to main content

ambers/
metadata.rs

1use indexmap::IndexMap;
2
3use crate::constants::{Alignment, Compression, Measure};
4use crate::variable::MissingValues;
5
6/// A value that can be used as a key in value label maps.
7#[derive(Debug, Clone)]
8pub enum Value {
9    Numeric(f64),
10    String(String),
11}
12
13// Manual Hash/Eq for Value since f64 doesn't implement Hash.
14// We use the raw bit pattern for numeric values.
15impl std::hash::Hash for Value {
16    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
17        match self {
18            Value::Numeric(v) => {
19                0_u8.hash(state);
20                v.to_bits().hash(state);
21            }
22            Value::String(s) => {
23                1_u8.hash(state);
24                s.hash(state);
25            }
26        }
27    }
28}
29
30impl PartialEq for Value {
31    fn eq(&self, other: &Self) -> bool {
32        match (self, other) {
33            (Value::Numeric(a), Value::Numeric(b)) => a.to_bits() == b.to_bits(),
34            (Value::String(a), Value::String(b)) => a == b,
35            _ => false,
36        }
37    }
38}
39
40impl Eq for Value {}
41
42impl PartialOrd for Value {
43    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
44        Some(self.cmp(other))
45    }
46}
47
48impl Ord for Value {
49    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
50        match (self, other) {
51            (Value::Numeric(a), Value::Numeric(b)) => {
52                a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
53            }
54            (Value::String(a), Value::String(b)) => a.cmp(b),
55            // Numeric sorts before String
56            (Value::Numeric(_), Value::String(_)) => std::cmp::Ordering::Less,
57            (Value::String(_), Value::Numeric(_)) => std::cmp::Ordering::Greater,
58        }
59    }
60}
61
62impl std::fmt::Display for Value {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        match self {
65            Value::Numeric(v) => {
66                // Display as integer if it's a whole number
67                if v.fract() == 0.0 && v.is_finite() {
68                    write!(f, "{}", *v as i64)
69                } else {
70                    write!(f, "{v}")
71                }
72            }
73            Value::String(s) => write!(f, "{s}"),
74        }
75    }
76}
77
78/// A missing value specification for the public API.
79#[derive(Debug, Clone)]
80pub enum MissingSpec {
81    /// A single discrete missing value.
82    Value(f64),
83    /// A range of missing values.
84    Range { lo: f64, hi: f64 },
85    /// A discrete string missing value.
86    StringValue(String),
87}
88
89/// Convert internal MissingValues to public MissingSpec list.
90pub fn missing_to_specs(mv: &MissingValues) -> Vec<MissingSpec> {
91    match mv {
92        MissingValues::None => vec![],
93        MissingValues::DiscreteNumeric(vals) => {
94            vals.iter().map(|&v| MissingSpec::Value(v)).collect()
95        }
96        MissingValues::Range { low, high } => {
97            vec![MissingSpec::Range {
98                lo: *low,
99                hi: *high,
100            }]
101        }
102        MissingValues::RangeAndValue { low, high, value } => {
103            vec![
104                MissingSpec::Range {
105                    lo: *low,
106                    hi: *high,
107                },
108                MissingSpec::Value(*value),
109            ]
110        }
111        MissingValues::DiscreteString(vals) => vals
112            .iter()
113            .map(|v| {
114                MissingSpec::StringValue(
115                    String::from_utf8_lossy(v).trim_end().to_string(),
116                )
117            })
118            .collect(),
119    }
120}
121
122/// Multiple response set definition.
123#[derive(Debug, Clone)]
124pub struct MrSet {
125    pub name: String,
126    pub label: String,
127    pub mr_type: MrType,
128    pub counted_value: Option<String>,
129    pub variables: Vec<String>,
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub enum MrType {
134    MultipleDichotomy,
135    MultipleCategory,
136}
137
138/// The complete metadata for an SPSS file.
139#[derive(Debug, Clone)]
140pub struct SpssMetadata {
141    // File-level
142    pub file_label: String,
143    pub file_encoding: String,
144    pub compression: Compression,
145    pub creation_time: String,
146    pub modification_time: String,
147    pub notes: Vec<String>,
148    pub number_rows: Option<i64>,
149    pub number_columns: usize,
150    pub file_format: String,
151
152    // Variable names (ordered -- defines Arrow schema column order)
153    pub variable_names: Vec<String>,
154
155    // Variable labels: {name -> label}
156    pub variable_labels: IndexMap<String, String>,
157
158    // Type info
159    pub spss_variable_types: IndexMap<String, String>,
160    pub rust_variable_types: IndexMap<String, String>,
161
162    // Value labels: {var_name -> {value -> label}}
163    pub variable_value_labels: IndexMap<String, IndexMap<Value, String>>,
164
165    // Display properties
166    pub variable_alignment: IndexMap<String, Alignment>,
167    pub variable_storage_width: IndexMap<String, usize>,
168    pub variable_display_width: IndexMap<String, u32>,
169    pub variable_measure: IndexMap<String, Measure>,
170
171    // Missing values
172    pub variable_missing: IndexMap<String, Vec<MissingSpec>>,
173
174    // SPSS-specific
175    pub mr_sets: IndexMap<String, MrSet>,
176    pub weight_variable: Option<String>,
177}
178
179impl SpssMetadata {
180    /// Get a variable label by name.
181    pub fn label(&self, name: &str) -> Option<&str> {
182        self.variable_labels.get(name).map(|s| s.as_str())
183    }
184
185    /// Get value labels for a variable.
186    pub fn value_labels(&self, name: &str) -> Option<&IndexMap<Value, String>> {
187        self.variable_value_labels.get(name)
188    }
189
190    /// Get the SPSS format string for a variable (e.g., "F8.2", "A50").
191    pub fn format(&self, name: &str) -> Option<&str> {
192        self.spss_variable_types.get(name).map(|s| s.as_str())
193    }
194
195    /// Get the measurement level for a variable.
196    pub fn measure(&self, name: &str) -> Option<Measure> {
197        self.variable_measure.get(name).copied()
198    }
199
200}
201
202impl Default for SpssMetadata {
203    fn default() -> Self {
204        SpssMetadata {
205            file_label: String::new(),
206            file_encoding: "UTF-8".to_string(),
207            compression: Compression::None,
208            creation_time: String::new(),
209            modification_time: String::new(),
210            notes: Vec::new(),
211            number_rows: None,
212            number_columns: 0,
213            file_format: "sav".to_string(),
214            variable_names: Vec::new(),
215            variable_labels: IndexMap::new(),
216            spss_variable_types: IndexMap::new(),
217            rust_variable_types: IndexMap::new(),
218            variable_value_labels: IndexMap::new(),
219            variable_alignment: IndexMap::new(),
220            variable_storage_width: IndexMap::new(),
221            variable_display_width: IndexMap::new(),
222            variable_measure: IndexMap::new(),
223            variable_missing: IndexMap::new(),
224            mr_sets: IndexMap::new(),
225            weight_variable: None,
226        }
227    }
228}