1use ndarray::{Array, Ix1};
11use std::collections::HashMap;
12use std::fmt;
13
14use super::masked_array::ArrayError;
16
17#[derive(Clone, Debug)]
19pub enum FieldValue {
20 Bool(bool),
21 Int8(i8),
22 Int16(i16),
23 Int32(i32),
24 Int64(i64),
25 UInt8(u8),
26 UInt16(u16),
27 UInt32(u32),
28 UInt64(u64),
29 Float32(f32),
30 Float64(f64),
31 String(String),
32 }
34
35impl From<bool> for FieldValue {
37 fn from(value: bool) -> Self {
38 Self::Bool(value)
39 }
40}
41
42impl From<i8> for FieldValue {
43 fn from(value: i8) -> Self {
44 Self::Int8(value)
45 }
46}
47
48impl From<i16> for FieldValue {
49 fn from(value: i16) -> Self {
50 Self::Int16(value)
51 }
52}
53
54impl From<i32> for FieldValue {
55 fn from(value: i32) -> Self {
56 Self::Int32(value)
57 }
58}
59
60impl From<i64> for FieldValue {
61 fn from(value: i64) -> Self {
62 Self::Int64(value)
63 }
64}
65
66impl From<u8> for FieldValue {
67 fn from(value: u8) -> Self {
68 Self::UInt8(value)
69 }
70}
71
72impl From<u16> for FieldValue {
73 fn from(value: u16) -> Self {
74 Self::UInt16(value)
75 }
76}
77
78impl From<u32> for FieldValue {
79 fn from(value: u32) -> Self {
80 Self::UInt32(value)
81 }
82}
83
84impl From<u64> for FieldValue {
85 fn from(value: u64) -> Self {
86 Self::UInt64(value)
87 }
88}
89
90impl From<f32> for FieldValue {
91 fn from(value: f32) -> Self {
92 Self::Float32(value)
93 }
94}
95
96impl From<f64> for FieldValue {
97 fn from(value: f64) -> Self {
98 Self::Float64(value)
99 }
100}
101
102impl From<&str> for FieldValue {
103 fn from(value: &str) -> Self {
104 Self::String(value.to_string())
105 }
106}
107
108impl From<String> for FieldValue {
109 fn from(value: String) -> Self {
110 Self::String(value)
111 }
112}
113
114impl fmt::Display for FieldValue {
115 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116 match self {
117 Self::Bool(v) => write!(f, "{v}"),
118 Self::Int8(v) => write!(f, "{v}"),
119 Self::Int16(v) => write!(f, "{v}"),
120 Self::Int32(v) => write!(f, "{v}"),
121 Self::Int64(v) => write!(f, "{v}"),
122 Self::UInt8(v) => write!(f, "{v}"),
123 Self::UInt16(v) => write!(f, "{v}"),
124 Self::UInt32(v) => write!(f, "{v}"),
125 Self::UInt64(v) => write!(f, "{v}"),
126 Self::Float32(v) => write!(f, "{v}"),
127 Self::Float64(v) => write!(f, "{v}"),
128 Self::String(v) => write!(f, "\"{v}\""),
129 }
130 }
131}
132
133#[derive(Clone, Debug, Default)]
135pub struct Record {
136 fields: HashMap<String, FieldValue>,
138
139 field_names: Vec<String>,
141}
142
143impl Record {
144 #[must_use]
146 pub fn new() -> Self {
147 Self::default()
148 }
149
150 pub fn add_field(&mut self, name: &str, value: FieldValue) {
152 if !self.fields.contains_key(name) {
153 self.field_names.push(name.to_string());
154 }
155 self.fields.insert(name.to_string(), value);
156 }
157
158 #[must_use]
160 pub fn get_field(&self, name: &str) -> Option<&FieldValue> {
161 self.fields.get(name)
162 }
163
164 pub fn get_field_mut(&mut self, name: &str) -> Option<&mut FieldValue> {
166 self.fields.get_mut(name)
167 }
168
169 #[must_use]
171 pub fn num_fields(&self) -> usize {
172 self.fields.len()
173 }
174
175 #[must_use]
177 #[allow(clippy::missing_const_for_fn)]
178 pub fn field_names(&self) -> &[String] {
179 &self.field_names
180 }
181
182 #[must_use]
184 pub fn pprint(&self) -> String {
185 let mut result = String::new();
186
187 let max_name_len = self
188 .field_names
189 .iter()
190 .map(std::string::String::len)
191 .max()
192 .unwrap_or(0);
193
194 for name in &self.field_names {
195 if let Some(value) = self.fields.get(name) {
196 use std::fmt::Write;
197 let _ = writeln!(&mut result, "{name:<max_name_len$}: {value}");
198 }
199 }
200
201 result
202 }
203}
204
205impl fmt::Display for Record {
206 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207 write!(
208 f,
209 "({})",
210 self.field_names
211 .iter()
212 .filter_map(|name| self.fields.get(name).map(|v| format!("{name}: {v}")))
213 .collect::<Vec<_>>()
214 .join(", ")
215 )
216 }
217}
218
219#[derive(Clone, Debug)]
221pub struct RecordArray {
222 pub records: Vec<Record>,
224
225 pub field_names: Vec<String>,
227
228 pub field_titles: HashMap<String, String>,
230
231 field_indices: HashMap<String, usize>,
233
234 shape: Vec<usize>,
236
237 allow_field_attributes: bool,
239}
240
241impl RecordArray {
242 pub fn new(records: Vec<Record>) -> Result<Self, ArrayError> {
247 if records.is_empty() {
248 return Err(ArrayError::ValueError(
249 "Records cannot be empty".to_string(),
250 ));
251 }
252
253 let field_names = records[0].field_names().to_vec();
255
256 for (i, record) in records.iter().enumerate().skip(1) {
258 let record_fields = record.field_names();
259 if record_fields.len() != field_names.len() {
260 return Err(ArrayError::ValueError(format!(
261 "Record {i} has {} fields, but expected {}",
262 record_fields.len(),
263 field_names.len()
264 )));
265 }
266
267 for name in &field_names {
268 if !record_fields.contains(name) {
269 return Err(ArrayError::ValueError(format!(
270 "Record {i} is missing field '{name}'"
271 )));
272 }
273 }
274 }
275
276 let mut field_indices = HashMap::new();
278 for (i, name) in field_names.iter().enumerate() {
279 field_indices.insert(name.clone(), i);
280 }
281
282 let len = records.len();
284
285 Ok(Self {
286 records,
287 field_names,
288 field_titles: HashMap::new(),
289 field_indices,
290 shape: vec![len],
291 allow_field_attributes: true,
292 })
293 }
294
295 pub fn with_titles(
300 records: Vec<Record>,
301 titles: HashMap<String, String>,
302 ) -> Result<Self, ArrayError> {
303 let mut record_array = Self::new(records)?;
304
305 for field_name in titles.keys() {
307 if !record_array.field_indices.contains_key(field_name) {
308 return Err(ArrayError::ValueError(format!(
309 "Cannot add title for non-existent field '{field_name}'"
310 )));
311 }
312 }
313
314 record_array.field_titles = titles;
315 Ok(record_array)
316 }
317
318 pub const fn set_allow_field_attributes(&mut self, allow: bool) {
320 self.allow_field_attributes = allow;
321 }
322
323 #[must_use]
325 pub const fn allow_field_attributes(&self) -> bool {
326 self.allow_field_attributes
327 }
328
329 #[must_use]
331 #[allow(clippy::missing_const_for_fn)]
332 pub fn shape(&self) -> &[usize] {
333 &self.shape
334 }
335
336 #[must_use]
338 pub fn num_records(&self) -> usize {
339 self.records.len()
340 }
341
342 #[must_use]
344 pub fn get_record(&self, index: usize) -> Option<&Record> {
345 self.records.get(index)
346 }
347
348 pub fn get_record_mut(&mut self, index: usize) -> Option<&mut Record> {
350 self.records.get_mut(index)
351 }
352
353 pub fn get_field_values(&self, field_name: &str) -> Result<Vec<FieldValue>, ArrayError> {
361 if !self.field_indices.contains_key(field_name) {
362 return Err(ArrayError::ValueError(format!(
363 "Field '{field_name}' not found"
364 )));
365 }
366
367 let values = self
368 .records
369 .iter()
370 .map(|record| record.get_field(field_name).unwrap().clone())
371 .collect();
372
373 Ok(values)
374 }
375
376 #[allow(clippy::cast_precision_loss)]
381 pub fn get_field_as_f64(&self, field_name: &str) -> Result<Array<f64, Ix1>, ArrayError> {
382 let values = self.get_field_values(field_name)?;
383
384 let mut result = Array::zeros(self.records.len());
385
386 for (i, value) in values.iter().enumerate() {
387 let val = match value {
388 FieldValue::Bool(v) => {
389 if *v {
390 1.0
391 } else {
392 0.0
393 }
394 }
395 FieldValue::Int8(v) => f64::from(*v),
396 FieldValue::Int16(v) => f64::from(*v),
397 FieldValue::Int32(v) => f64::from(*v),
398 FieldValue::Int64(v) => *v as f64,
399 FieldValue::UInt8(v) => f64::from(*v),
400 FieldValue::UInt16(v) => f64::from(*v),
401 FieldValue::UInt32(v) => f64::from(*v),
402 FieldValue::UInt64(v) => *v as f64,
403 FieldValue::Float32(v) => f64::from(*v),
404 FieldValue::Float64(v) => *v,
405 FieldValue::String(_) => {
406 return Err(ArrayError::ValueError(format!(
407 "Cannot convert field '{field_name}' of type String to f64"
408 )))
409 }
410 };
411
412 result[i] = val;
413 }
414
415 Ok(result)
416 }
417
418 #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
423 pub fn get_field_as_i64(&self, field_name: &str) -> Result<Array<i64, Ix1>, ArrayError> {
424 let values = self.get_field_values(field_name)?;
425
426 let mut result = Array::zeros(self.records.len());
427
428 for (i, value) in values.iter().enumerate() {
429 let val = match value {
430 FieldValue::Bool(v) => i64::from(*v),
431 FieldValue::Int8(v) => i64::from(*v),
432 FieldValue::Int16(v) => i64::from(*v),
433 FieldValue::Int32(v) => i64::from(*v),
434 FieldValue::Int64(v) => *v,
435 FieldValue::UInt8(v) => i64::from(*v),
436 FieldValue::UInt16(v) => i64::from(*v),
437 FieldValue::UInt32(v) => i64::from(*v),
438 FieldValue::UInt64(v) => {
439 if *v > i64::MAX as u64 {
440 return Err(ArrayError::ValueError(format!(
441 "Value {v} in field '{field_name}' is too large for i64"
442 )));
443 }
444 *v as i64
445 }
446 FieldValue::Float32(v) => *v as i64,
447 FieldValue::Float64(v) => *v as i64,
448 FieldValue::String(_) => {
449 return Err(ArrayError::ValueError(format!(
450 "Cannot convert field '{field_name}' of type String to i64"
451 )))
452 }
453 };
454
455 result[i] = val;
456 }
457
458 Ok(result)
459 }
460
461 pub fn get_field_as_string(&self, field_name: &str) -> Result<Vec<String>, ArrayError> {
466 let values = self.get_field_values(field_name)?;
467
468 let mut result = Vec::with_capacity(self.records.len());
469
470 for value in values {
471 let val = match value {
472 FieldValue::Bool(v) => v.to_string(),
473 FieldValue::Int8(v) => v.to_string(),
474 FieldValue::Int16(v) => v.to_string(),
475 FieldValue::Int32(v) => v.to_string(),
476 FieldValue::Int64(v) => v.to_string(),
477 FieldValue::UInt8(v) => v.to_string(),
478 FieldValue::UInt16(v) => v.to_string(),
479 FieldValue::UInt32(v) => v.to_string(),
480 FieldValue::UInt64(v) => v.to_string(),
481 FieldValue::Float32(v) => v.to_string(),
482 FieldValue::Float64(v) => v.to_string(),
483 FieldValue::String(v) => v,
484 };
485
486 result.push(val);
487 }
488
489 Ok(result)
490 }
491
492 pub fn get_field_by_title(&self, title: &str) -> Result<Vec<FieldValue>, ArrayError> {
497 let field_name = self
499 .field_titles
500 .iter()
501 .find_map(|(name, t)| if t == title { Some(name) } else { None })
502 .ok_or_else(|| ArrayError::ValueError(format!("Title '{title}' not found")))?;
503
504 self.get_field_values(field_name)
506 }
507
508 pub fn set_field_value(
513 &mut self,
514 record_idx: usize,
515 field_name: &str,
516 value: FieldValue,
517 ) -> Result<(), ArrayError> {
518 if !self.field_indices.contains_key(field_name) {
520 return Err(ArrayError::ValueError(format!(
521 "Field '{field_name}' not found"
522 )));
523 }
524
525 let record = self.get_record_mut(record_idx).ok_or_else(|| {
527 ArrayError::ValueError(format!("Record index {record_idx} out of bounds"))
528 })?;
529
530 record.add_field(field_name, value);
531 Ok(())
532 }
533
534 pub fn add_field(
539 &mut self,
540 field_name: &str,
541 values: Vec<FieldValue>,
542 ) -> Result<(), ArrayError> {
543 if self.field_indices.contains_key(field_name) {
545 return Err(ArrayError::ValueError(format!(
546 "Field '{field_name}' already exists"
547 )));
548 }
549
550 if values.len() != self.records.len() {
552 return Err(ArrayError::ValueError(format!(
553 "Number of values ({}) doesn't match number of records ({})",
554 values.len(),
555 self.records.len()
556 )));
557 }
558
559 for (i, record) in self.records.iter_mut().enumerate() {
561 record.add_field(field_name, values[i].clone());
562 }
563
564 let new_index = self.field_names.len();
566 self.field_names.push(field_name.to_string());
567 self.field_indices.insert(field_name.to_string(), new_index);
568
569 Ok(())
570 }
571
572 pub fn remove_field(&mut self, field_name: &str) -> Result<(), ArrayError> {
577 if !self.field_indices.contains_key(field_name) {
579 return Err(ArrayError::ValueError(format!(
580 "Field '{field_name}' not found"
581 )));
582 }
583
584 for record in &mut self.records {
586 let new_field_names: Vec<String> = record
588 .field_names
589 .iter()
590 .filter(|name| *name != field_name)
591 .cloned()
592 .collect();
593
594 record.fields.remove(field_name);
596
597 record.field_names = new_field_names;
599 }
600
601 let index_to_remove = self.field_indices[field_name];
603
604 self.field_names.remove(index_to_remove);
606
607 self.field_titles.remove(field_name);
609
610 self.field_indices.clear();
612 for (i, name) in self.field_names.iter().enumerate() {
613 self.field_indices.insert(name.clone(), i);
614 }
615
616 Ok(())
617 }
618
619 pub fn rename_field(&mut self, old_name: &str, new_name: &str) -> Result<(), ArrayError> {
624 if !self.field_indices.contains_key(old_name) {
626 return Err(ArrayError::ValueError(format!(
627 "Field '{old_name}' not found"
628 )));
629 }
630
631 if self.field_indices.contains_key(new_name) {
633 return Err(ArrayError::ValueError(format!(
634 "Field '{new_name}' already exists"
635 )));
636 }
637
638 for record in &mut self.records {
640 if let Some(value) = record.fields.remove(old_name) {
642 record.add_field(new_name, value);
644
645 let old_index = record
647 .field_names
648 .iter()
649 .position(|name| name == old_name)
650 .unwrap();
651 record.field_names[old_index] = new_name.to_string();
652 }
653 }
654
655 let old_index = self.field_indices[old_name];
657 self.field_names[old_index] = new_name.to_string();
658
659 self.field_indices.remove(old_name);
661 self.field_indices.insert(new_name.to_string(), old_index);
662
663 if let Some(title) = self.field_titles.remove(old_name) {
665 self.field_titles.insert(new_name.to_string(), title);
666 }
667
668 Ok(())
669 }
670
671 pub fn view(&self, indices: &[usize]) -> Result<Self, ArrayError> {
676 let mut new_records = Vec::with_capacity(indices.len());
677
678 for &idx in indices {
680 if idx >= self.records.len() {
681 return Err(ArrayError::ValueError(format!(
682 "Index {idx} out of bounds for record array of length {}",
683 self.records.len()
684 )));
685 }
686
687 new_records.push(self.records[idx].clone());
688 }
689
690 let result = Self {
692 records: new_records,
693 field_names: self.field_names.clone(),
694 field_titles: self.field_titles.clone(),
695 field_indices: self.field_indices.clone(),
696 shape: vec![indices.len()],
697 allow_field_attributes: self.allow_field_attributes,
698 };
699
700 Ok(result)
701 }
702
703 pub fn filter<F>(&self, field_name: &str, condition: F) -> Result<Self, ArrayError>
708 where
709 F: Fn(&FieldValue) -> bool,
710 {
711 if !self.field_indices.contains_key(field_name) {
713 return Err(ArrayError::ValueError(format!(
714 "Field '{field_name}' not found"
715 )));
716 }
717
718 let values = self.get_field_values(field_name)?;
720
721 let mut indices = Vec::new();
723 for (i, value) in values.iter().enumerate() {
724 if condition(value) {
725 indices.push(i);
726 }
727 }
728
729 self.view(&indices)
731 }
732
733 pub fn merge(&self, other: &Self) -> Result<Self, ArrayError> {
738 if self.field_names.len() != other.field_names.len() {
740 return Err(ArrayError::ValueError(format!(
741 "Cannot merge record arrays with different number of fields ({} vs {})",
742 self.field_names.len(),
743 other.field_names.len()
744 )));
745 }
746
747 for name in &self.field_names {
748 if !other.field_indices.contains_key(name) {
749 return Err(ArrayError::ValueError(format!(
750 "Field '{name}' not found in the second record array"
751 )));
752 }
753 }
754
755 let mut new_records = Vec::with_capacity(self.records.len() + other.records.len());
757 new_records.extend_from_slice(&self.records);
758 new_records.extend_from_slice(&other.records);
759
760 let result = Self {
762 records: new_records,
763 field_names: self.field_names.clone(),
764 field_titles: self.field_titles.clone(),
765 field_indices: self.field_indices.clone(),
766 shape: vec![self.records.len() + other.records.len()],
767 allow_field_attributes: self.allow_field_attributes,
768 };
769
770 Ok(result)
771 }
772}
773
774#[allow(dead_code)]
776fn compare_field_values(a: &FieldValue, b: &FieldValue) -> Option<std::cmp::Ordering> {
777 match (a, b) {
778 (FieldValue::Bool(a), FieldValue::Bool(b)) => Some(a.cmp(b)),
780 (FieldValue::Int8(a), FieldValue::Int8(b)) => Some(a.cmp(b)),
781 (FieldValue::Int16(a), FieldValue::Int16(b)) => Some(a.cmp(b)),
782 (FieldValue::Int32(a), FieldValue::Int32(b)) => Some(a.cmp(b)),
783 (FieldValue::Int64(a), FieldValue::Int64(b)) => Some(a.cmp(b)),
784 (FieldValue::UInt8(a), FieldValue::UInt8(b)) => Some(a.cmp(b)),
785 (FieldValue::UInt16(a), FieldValue::UInt16(b)) => Some(a.cmp(b)),
786 (FieldValue::UInt32(a), FieldValue::UInt32(b)) => Some(a.cmp(b)),
787 (FieldValue::UInt64(a), FieldValue::UInt64(b)) => Some(a.cmp(b)),
788 (FieldValue::Float32(a), FieldValue::Float32(b)) => a.partial_cmp(b),
789 (FieldValue::Float64(a), FieldValue::Float64(b)) => a.partial_cmp(b),
790 (FieldValue::String(a), FieldValue::String(b)) => Some(a.cmp(b)),
791
792 (FieldValue::Int8(a), FieldValue::Float32(b)) => (*a as f32).partial_cmp(b),
794 (FieldValue::Int8(a), FieldValue::Float64(b)) => (*a as f64).partial_cmp(b),
795 (FieldValue::Float32(a), FieldValue::Int8(b)) => a.partial_cmp(&(*b as f32)),
796 (FieldValue::Float64(a), FieldValue::Int8(b)) => a.partial_cmp(&(*b as f64)),
797
798 _ => {
800 let type_a = std::any::type_name::<FieldValue>();
801 let type_b = std::any::type_name::<FieldValue>();
802 Some(type_a.cmp(type_b))
803 }
804 }
805}
806
807impl fmt::Display for RecordArray {
808 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
809 writeln!(f, "RecordArray(")?;
810
811 let max_records_to_show = 10;
812 let num_records = self.records.len();
813 let show_all = num_records <= max_records_to_show;
814
815 let records_to_show = if show_all {
816 &self.records[..]
817 } else {
818 let half = max_records_to_show / 2;
819 &self.records[..half]
820 };
821
822 for record in records_to_show {
823 writeln!(f, " {record},")?;
824 }
825
826 if !show_all {
827 writeln!(f, " ...")?;
828
829 let half = max_records_to_show / 2;
830 let remaining = &self.records[num_records - half..];
831
832 for record in remaining {
833 writeln!(f, " {record},")?;
834 }
835 }
836
837 write!(f, ")")
838 }
839}
840
841pub fn record_array_from_arrays(
846 field_names: &[&str],
847 arrays: &[Vec<FieldValue>],
848) -> Result<RecordArray, ArrayError> {
849 if field_names.len() != arrays.len() {
850 return Err(ArrayError::ValueError(format!(
851 "Number of field names ({}) must match number of arrays ({})",
852 field_names.len(),
853 arrays.len()
854 )));
855 }
856
857 if arrays.is_empty() {
858 return Err(ArrayError::ValueError("No arrays provided".to_string()));
859 }
860
861 let num_records = arrays[0].len();
862
863 for (i, array) in arrays.iter().enumerate().skip(1) {
865 if array.len() != num_records {
866 return Err(ArrayError::ValueError(format!(
867 "Array {i} has length {}, but expected {num_records}",
868 array.len()
869 )));
870 }
871 }
872
873 let mut records = Vec::with_capacity(num_records);
875
876 for i in 0..num_records {
877 let mut record = Record::new();
878
879 for (name, array) in field_names.iter().zip(arrays.iter()) {
880 record.add_field(name, array[i].clone());
881 }
882
883 records.push(record);
884 }
885
886 RecordArray::new(records)
887}
888
889pub fn record_array_from_typed_arrays<A, B, C>(
894 field_names: &[&str],
895 arrays: (&[A], &[B], &[C]),
896) -> Result<RecordArray, ArrayError>
897where
898 A: Clone + Into<FieldValue>,
899 B: Clone + Into<FieldValue>,
900 C: Clone + Into<FieldValue>,
901{
902 if field_names.len() != 3 {
903 return Err(ArrayError::ValueError(format!(
904 "Number of field names ({}) must match number of arrays (3)",
905 field_names.len()
906 )));
907 }
908
909 let a_len = arrays.0.len();
910 let b_len = arrays.1.len();
911 let c_len = arrays.2.len();
912
913 if a_len != b_len || a_len != c_len {
915 return Err(ArrayError::ValueError(format!(
916 "Arrays have different lengths: {a_len}, {b_len}, {c_len}"
917 )));
918 }
919
920 let mut records = Vec::with_capacity(a_len);
922
923 for i in 0..a_len {
924 let mut record = Record::new();
925
926 record.add_field(field_names[0], arrays.0[i].clone().into());
927 record.add_field(field_names[1], arrays.1[i].clone().into());
928 record.add_field(field_names[2], arrays.2[i].clone().into());
929
930 records.push(record);
931 }
932
933 RecordArray::new(records)
934}
935
936pub fn record_array_from_records<A, B, C>(
941 field_names: &[&str],
942 tuples: &[(A, B, C)],
943) -> Result<RecordArray, ArrayError>
944where
945 A: Clone + Into<FieldValue>,
946 B: Clone + Into<FieldValue>,
947 C: Clone + Into<FieldValue>,
948{
949 if field_names.len() != 3 {
950 return Err(ArrayError::ValueError(format!(
951 "Number of field names ({}) must match number of tuple elements (3)",
952 field_names.len()
953 )));
954 }
955
956 let mut records = Vec::with_capacity(tuples.len());
958
959 for tuple in tuples {
960 let mut record = Record::new();
961
962 record.add_field(field_names[0], tuple.0.clone().into());
963 record.add_field(field_names[1], tuple.1.clone().into());
964 record.add_field(field_names[2], tuple.2.clone().into());
965
966 records.push(record);
967 }
968
969 RecordArray::new(records)
970}