1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
use rayon::prelude::*;
use polars::{prelude::NamedFrom, series::Series};
/// Enum representing column data types in a binary table.
/// Each variant corresponds to a specific data type.
#[derive(Debug, PartialEq)]
pub enum ColumnDataBuffer {
I(Vec<i32>),
E(Vec<f32>),
D(Vec<f64>),
A(Vec<String>),
F(Vec<f32>),
}
impl ColumnDataBuffer {
/// Creates a new `ColumnDataBuffer` based on the FITS column format (TFORM) and size.
///
/// # Arguments
/// * `tform` - A string specifying the FITS column format.
/// * `size` - The number of elements to allocate in the buffer.
///
/// # Panics
/// Panics if the `tform` specifies an unsupported data type.
pub fn new(tform : &str, size : i32) -> Self {
let tform = tform.trim();
let tform_type = tform.chars().next().unwrap();
match tform_type {
'I' => ColumnDataBuffer::I(vec![0; size as usize]),
'E' => ColumnDataBuffer::E(vec![0.0; size as usize]),
'D' => ColumnDataBuffer::D(vec![0.0; size as usize]),
'A' => ColumnDataBuffer::A(vec![String::new(); size as usize]),
'F' => ColumnDataBuffer::F(vec![0.0; size as usize]),
_ => panic!("Wrong data type"),
}
}
/// Computes the maximum length of elements in the buffer.
///
/// # Returns
/// The maximum length of elements when converted to strings (for numeric types)
/// or the maximum string length (for string type).
pub fn max_len(&self) -> usize {
match self {
ColumnDataBuffer::I(data) => data.par_iter().map(|x| x.to_string().len()).max().unwrap_or(0),
ColumnDataBuffer::E(data) => data.par_iter().map(|x| x.to_string().len()).max().unwrap_or(0),
ColumnDataBuffer::D(data) => data.par_iter().map(|x| x.to_string().len()).max().unwrap_or(0),
ColumnDataBuffer::A(data) => data.par_iter().map(|x| x.len()).max().unwrap_or(0),
ColumnDataBuffer::F(data) => data.par_iter().map(|x| x.to_string().len()).max().unwrap_or(0),
}
}
/// Returns the byte size of the elements in the buffer.
///
/// # Returns
/// The size in bytes of a single element in the buffer.
pub fn byte_value(&self) -> usize{
match self {
ColumnDataBuffer::I(_data) => 4,
ColumnDataBuffer::E(_data) => 4,
ColumnDataBuffer::D(_data) => 8,
ColumnDataBuffer::A(_data) => 1,
ColumnDataBuffer::F(_data) => 4,
}
}
/// Returns the byte size of the specified data type.
///
/// # Arguments
/// * `data_type` - A string representing the FITS data type.
///
/// # Returns
/// The size in bytes of the specified data type.
///
/// # Panics
/// Panics if the `data_type` is invalid.
pub fn byte_value_from_str(data_type : &str) -> usize {
match data_type {
"I" => 4,
"E" => 4,
"D" => 8,
"A" => 1,
"F" => 4,
_ => panic!("Wrong data type"),
}
}
/// Converts the buffer into a Polars `Series` for analysis or manipulation.
///
/// # Arguments
/// * `col_name` - The name of the column in the resulting `Series`.
///
/// # Returns
/// A `Series` representing the data in the buffer.
pub fn to_series(&self, col_name : &str) -> Series {
match self {
ColumnDataBuffer::I(data) => Series::new(col_name, data),
ColumnDataBuffer::E(data) => Series::new(col_name, data),
ColumnDataBuffer::D(data) => Series::new(col_name, data),
ColumnDataBuffer::A(data) => Series::new(col_name, data),
ColumnDataBuffer::F(data) => Series::new(col_name, data),
}
}
/// Clears all elements in the buffer.
pub fn clear(&mut self){
match self {
ColumnDataBuffer::I(data) => data.clear(),
ColumnDataBuffer::E(data) => data.clear(),
ColumnDataBuffer::D(data) => data.clear(),
ColumnDataBuffer::A(data) => data.clear(),
ColumnDataBuffer::F(data) => data.clear(),
}
}
/// Writes data into the buffer at a specified index.
///
/// # Arguments
/// * `bytes` - A slice of bytes representing the value to write.
/// * `data_type` - The data type of the value (`I`, `E`, `D`, `A`, `F`).
/// * `idx` - The index at which to write the value.
///
/// # Panics
/// Panics if the data type does not match the buffer type or if parsing fails.
pub fn write_on_idx(&mut self, bytes : &[u8], data_type : char, idx : i64){
let string = String::from_utf8_lossy(bytes).trim_end().trim_start().to_string();
match data_type {
'I' => {
// parse bytes to i32
match self {
ColumnDataBuffer::I(data) => data[idx as usize] = string.parse::<i32>().unwrap(),
_ => panic!("Wrong data type"),
}
},
'E' => {
// parse bytes to f32
match self {
ColumnDataBuffer::E(data) => data[idx as usize] = string.parse::<f32>().unwrap(),
_ => panic!("Wrong data type"),
}
},
'D' => {
// parse bytes to f64
match self {
ColumnDataBuffer::D(data) => data[idx as usize] = string.parse::<f64>().unwrap(),
_ => panic!("Wrong data type"),
}
},
'A' => {
// parse bytes to String
match self {
ColumnDataBuffer::A(data) => data[idx as usize] = string,
_ => panic!("Wrong data type"),
}
},
'F' => {
// parse bytes to f32
match self {
ColumnDataBuffer::F(data) => data[idx as usize] = string.parse::<f32>().unwrap(),
_ => panic!("Wrong data type"),
}
},
_ => panic!("Wrong data type"),
}
}
//no need for max_len on bintable
}