pyref_core/
io.rs

1use astrors_fork::io::hdus::{
2    image::{imagehdu::ImageHDU, ImageData},
3    primaryhdu::PrimaryHDU,
4};
5use ndarray::{ArrayBase, Axis, Dim, IxDynImpl, OwnedRepr};
6use polars::prelude::*;
7use std::ops::Mul;
8
9use crate::enums::HeaderValue;
10use crate::errors::FitsLoaderError;
11
12// Find the theta offset between theta and the ccd theta or 2theta
13pub fn theta_offset(theta: f64, ccd_theta: f64) -> f64 {
14    // 2theta = -ccd_theta / 2 - theta rounded to 3 decimal places
15    let ccd_theta = ccd_theta / 2.0;
16    ccd_theta - theta
17}
18
19pub fn q(lam: f64, theta: f64, angle_offset: f64) -> f64 {
20    let theta = theta - angle_offset;
21    match 4.0 * std::f64::consts::PI * theta.to_radians().sin() / lam {
22        q if q < 0.0 => 0.0,
23        q => q,
24    }
25}
26
27pub fn col_from_array(
28    name: PlSmallStr,
29    array: ArrayBase<OwnedRepr<i64>, Dim<IxDynImpl>>,
30) -> Result<Column, PolarsError> {
31    let size0 = array.len_of(Axis(0));
32    let size1 = array.len_of(Axis(1));
33
34    let mut s = Column::new_empty(
35        name,
36        &DataType::List(Box::new(DataType::List(Box::new(DataType::Int64)))),
37    );
38
39    let mut chunked_builder = ListPrimitiveChunkedBuilder::<Int64Type>::new(
40        PlSmallStr::EMPTY,
41        array.len_of(Axis(0)),
42        array.len_of(Axis(1)) * array.len_of(Axis(0)),
43        DataType::Int64,
44    );
45    for row in array.axis_iter(Axis(0)) {
46        match row.as_slice() {
47            Some(row) => chunked_builder.append_slice(row),
48            None => chunked_builder.append_slice(&row.to_owned().into_raw_vec()),
49        }
50    }
51    let new_series = chunked_builder
52        .finish()
53        .into_series()
54        .implode()
55        .unwrap()
56        .into_column();
57    let _ = s.extend(&new_series);
58    let s = s.cast(&DataType::Array(
59        Box::new(DataType::Array(Box::new(DataType::Int32), size1)),
60        size0,
61    ));
62    s
63}
64// ================== CCD Raw Data Processing ============
65pub fn add_calculated_domains(lzf: LazyFrame) -> DataFrame {
66    let h = physical_constants::PLANCK_CONSTANT_IN_EV_PER_HZ;
67    let c = physical_constants::SPEED_OF_LIGHT_IN_VACUUM * 1e10;
68
69    // Calculate lambda and q values in angstrom
70    let lz = lzf
71        // Sort by date first (primary sorting key)
72        .sort(["date"], Default::default())
73        // File name as fallback sorting key
74        .sort(["file_name"], Default::default())
75        .with_columns(&[
76            col("exposure").round(3).alias("exposure"),
77            col("higher_order_suppressor")
78                .round(2)
79                .alias("higher_order_suppressor"),
80            col("horizontal_exit_slit_size")
81                .round(1)
82                .alias("horizontal_exit_slit_size"),
83            col("beamline_energy").round(1).alias("beamline_energy"),
84        ])
85        .with_column(
86            col("beamline_energy")
87                .pow(-1)
88                .mul(lit(h * c))
89                .alias("lambda"),
90        );
91
92    let angle_offset = lz
93        .clone()
94        .filter(col("sample_theta").eq(0.0))
95        .last()
96        .select(&[col("ccd_theta"), col("sample_theta")])
97        .with_column(
98            as_struct(vec![col("sample_theta"), col("ccd_theta")])
99                .map(
100                    move |s| {
101                        let struc = s.struct_()?;
102                        let th_series = struc.field_by_name("sample_theta")?;
103                        let theta = th_series.f64()?;
104                        let ccd_th_series = struc.field_by_name("ccd_theta")?;
105                        let ccd_theta = ccd_th_series.f64()?;
106                        let out: Float64Chunked = theta
107                            .into_iter()
108                            .zip(ccd_theta.iter())
109                            .map(|(theta, ccd_theta)| match (theta, ccd_theta) {
110                                (Some(theta), Some(ccd_theta)) => {
111                                    Some(theta_offset(theta, ccd_theta))
112                                }
113                                _ => Some(0.0),
114                            })
115                            .collect();
116                        Ok(Some(out.into_column()))
117                    },
118                    GetOutput::from_type(DataType::Float64),
119                )
120                .alias("theta_offset"),
121        )
122        .select(&[col("theta_offset")])
123        .collect()
124        .unwrap()
125        .get(0)
126        .unwrap()[0]
127        .try_extract::<f64>()
128        .unwrap_or(0.0); // Add default value if extraction fails
129
130    // get the row cor
131    let lz = lz
132        .with_column(lit(angle_offset).alias("theta_offset"))
133        .with_column(
134            as_struct(vec![col("sample_theta"), col("lambda")])
135                .map(
136                    move |s| {
137                        let struc = s.struct_()?;
138                        let th_series = struc.field_by_name("sample_theta")?;
139                        let theta = th_series.f64()?;
140                        let lam_series = struc.field_by_name("lambda")?;
141                        let lam = lam_series.f64()?;
142
143                        let out: Float64Chunked = theta
144                            .into_iter()
145                            .zip(lam.iter())
146                            .map(|(theta, lam)| match (theta, lam) {
147                                (Some(theta), Some(lam)) => Some(q(lam, theta, angle_offset)),
148                                _ => None,
149                            })
150                            .collect();
151
152                        Ok(Some(out.into_column()))
153                    },
154                    GetOutput::from_type(DataType::Float64),
155                )
156                .alias("q"),
157        );
158    lz.collect().unwrap_or_else(|_| DataFrame::empty())
159}
160
161//
162// ================== CCD Data Loader ==================
163pub fn process_image(img: &ImageHDU) -> Result<Vec<Column>, FitsLoaderError> {
164    let bzero = img
165        .header
166        .get_card("BZERO")
167        .ok_or_else(|| FitsLoaderError::MissingHeaderKey("BZERO".into()))?
168        .value
169        .as_int()
170        .ok_or_else(|| FitsLoaderError::FitsError("BZERO not an integer".into()))?;
171
172    match &img.data {
173        ImageData::I16(image) => {
174            let data = image.map(|&x| i64::from(x as i64 + bzero));
175            Ok(vec![col_from_array("raw".into(), data.clone()).unwrap()])
176        }
177        _ => Err(FitsLoaderError::UnsupportedImageData),
178    }
179}
180
181pub fn process_metadata(
182    hdu: &PrimaryHDU,
183    keys: &Vec<HeaderValue>,
184) -> Result<Vec<Column>, FitsLoaderError> {
185    if keys.is_empty() {
186        // If no specific keys are requested, return all header values
187        Ok(hdu
188            .header
189            .iter()
190            .map(|card| {
191                let name = card.keyword.as_str();
192                let value = card.value.as_float().unwrap_or(0.0);
193                // Convert to snake_case without units
194                let clean_name = to_snake_case(name);
195                Column::new(clean_name.into(), &[value])
196            })
197            .collect())
198    } else {
199        // Process each requested header key
200        let mut columns = Vec::new();
201
202        for key in keys {
203            // Special handling for Beamline Energy
204            if key.hdu() == "Beamline Energy" {
205                // First try to get "Beamline Energy"
206                if let Some(card) = hdu.header.get_card(key.hdu()) {
207                    if let Some(val) = card.value.as_float() {
208                        columns.push(Column::new(key.snake_case_name().into(), &[val]));
209                        continue;
210                    }
211                }
212
213                // Then fall  backto "Beamline Energy Goal" if "Beamline Energy" is not present
214                if let Some(card) = hdu.header.get_card("Beamline Energy Goal") {
215                    if let Some(val) = card.value.as_float() {
216                        columns.push(Column::new(key.snake_case_name().into(), &[val]));
217                        continue;
218                    }
219                }
220
221                // If neither value is available, use a default
222                columns.push(Column::new(key.snake_case_name().into(), &[0.0]));
223                continue;
224            }
225
226            // Special handling for Date header (it's a string value, not a float)
227            if key.hdu() == "DATE" {
228                if let Some(card) = hdu.header.get_card(key.hdu()) {
229                    let val = card.value.to_string();
230                    columns.push(Column::new(key.snake_case_name().into(), &[val]));
231                    continue;
232                }
233                // If DATE is not present, use a default empty string
234                columns.push(Column::new(key.snake_case_name().into(), &["".to_string()]));
235                continue;
236            }
237
238            // For other headers, don't fail if they're missing
239            let val = match hdu.header.get_card(key.hdu()) {
240                Some(card) => card.value.as_float().unwrap_or(1.0),
241                None => 0.0, // Default value for missing headers
242            };
243
244            // Use the snake_case name from the enum
245            columns.push(Column::new(key.snake_case_name().into(), &[val]));
246        }
247
248        Ok(columns)
249    }
250}
251
252/// Convert a header name to snake_case without units
253fn to_snake_case(name: &str) -> String {
254    // First, remove any units in square brackets
255    let name_without_units = name.split(" [").next().unwrap_or(name);
256
257    // Then convert to snake_case
258    let mut result = String::new();
259    let mut previous_was_uppercase = false;
260
261    for (i, c) in name_without_units.chars().enumerate() {
262        if c.is_uppercase() {
263            if i > 0 && !previous_was_uppercase {
264                result.push('_');
265            }
266            result.push(c.to_lowercase().next().unwrap());
267            previous_was_uppercase = true;
268        } else if c.is_lowercase() {
269            result.push(c);
270            previous_was_uppercase = false;
271        } else if c.is_whitespace() {
272            result.push('_');
273            previous_was_uppercase = false;
274        } else if c.is_alphanumeric() {
275            result.push(c);
276            previous_was_uppercase = false;
277        }
278    }
279
280    result.to_lowercase()
281}
282
283pub fn process_file_name(path: std::path::PathBuf) -> Vec<Column> {
284    // Extract just the file name without extension
285    let file_name = path
286        .file_stem()
287        .unwrap()
288        .to_str()
289        .unwrap_or("")
290        .split(" ")
291        .next()
292        .unwrap_or("");
293
294    // Just return the file name directly, without extracting frame numbers or scan IDs
295    vec![Column::new("file_name".into(), vec![file_name])]
296}