pyref_core/
io.rs

1use astrors_fork::io::hdus::{
2    image::{imagehdu::ImageHDU, ImageData},
3    primaryhdu::PrimaryHDU,
4};
5use ndarray::{ArrayBase, Axis, Dim, IxDynImpl, OwnedRepr};
6use polars::prelude::*;
7use std::ops::Mul;
8
9use crate::errors::FitsLoaderError;
10
11pub fn q(lam: f64, theta: f64) -> f64 {
12    let theta = theta;
13    match 4.0 * std::f64::consts::PI * theta.to_radians().sin() / lam {
14        q if q < 0.0 => 0.0,
15        q => q,
16    }
17}
18
19pub fn col_from_array(
20    name: PlSmallStr,
21    array: ArrayBase<OwnedRepr<i64>, Dim<IxDynImpl>>,
22) -> Result<Column, PolarsError> {
23    let size0 = array.len_of(Axis(0));
24    let size1 = array.len_of(Axis(1));
25
26    let mut s = Column::new_empty(
27        name,
28        &DataType::List(Box::new(DataType::List(Box::new(DataType::Int64)))),
29    );
30
31    let mut chunked_builder = ListPrimitiveChunkedBuilder::<Int64Type>::new(
32        PlSmallStr::EMPTY,
33        array.len_of(Axis(1)),
34        array.len_of(Axis(1)) * array.len_of(Axis(0)),
35        DataType::Int64,
36    );
37    for col in array.axis_iter(Axis(1)) {
38        match col.as_slice() {
39            Some(col) => chunked_builder.append_slice(col),
40            None => chunked_builder.append_slice(&col.to_owned().into_raw_vec()),
41        }
42    }
43    let new_series = chunked_builder
44        .finish()
45        .into_series()
46        .implode()?
47        .into_column();
48    let _ = s.extend(&new_series);
49    let s = s.cast(&DataType::Array(
50        Box::new(DataType::Array(Box::new(DataType::Int64), size0)),
51        size1,
52    ));
53    s
54}
55// ================== CCD Raw Data Processing ============
56pub fn add_calculated_domains(lzf: LazyFrame) -> DataFrame {
57    let h = physical_constants::PLANCK_CONSTANT_IN_EV_PER_HZ;
58    let c = physical_constants::SPEED_OF_LIGHT_IN_VACUUM * 1e10;
59
60    // Collect schema once to check for column existence
61    let schema = lzf.clone().collect_schema().unwrap_or_default();
62    let has_column = |name: &str| schema.iter().any(|(col_name, _)| col_name == name);
63
64    // Start with basic sorting that won't fail even if columns don't exist
65    let mut lz = lzf;
66
67    // Apply optional sorting only if the columns exist
68    if has_column("DATE") {
69        lz = lz.sort(["DATE"], Default::default());
70    }
71
72    if has_column("file_name") {
73        lz = lz.sort(["file_name"], Default::default());
74    }
75
76    // Conditionally apply column transformations only if the columns exist
77    if has_column("EXPOSURE") {
78        lz = lz.with_column(col("EXPOSURE").round(3).alias("EXPOSURE"));
79    }
80
81    if has_column("Higher Order Suppressor") {
82        lz = lz.with_column(
83            col("Higher Order Suppressor")
84                .round(2)
85                .alias("Higher Order Suppressor"),
86        );
87    }
88
89    if has_column("Horizontal Exit Slit Size") {
90        lz = lz.with_column(
91            col("Horizontal Exit Slit Size")
92                .round(1)
93                .alias("Horizontal Exit Slit Size"),
94        );
95    }
96
97    if has_column("Beamline Energy") {
98        lz = lz.with_column(col("Beamline Energy").round(1).alias("Beamline Energy"));
99
100        // Only calculate Lambda if Beamline Energy exists
101        lz = lz.with_column(
102            col("Beamline Energy")
103                .pow(-1)
104                .mul(lit(h * c))
105                .alias("Lambda"),
106        );
107    }
108
109    // Add Q column if required columns exist
110    lz = lz.with_column(
111        when(
112            col("Sample Theta")
113                .is_not_null()
114                .and(col("Lambda").is_not_null()),
115        )
116        .then(as_struct(vec![col("Sample Theta"), col("Lambda")]).map(
117            move |s| {
118                let struc = s.struct_()?;
119                let th_series = struc.field_by_name("Sample Theta")?;
120                let theta = th_series.f64()?;
121                let lam_series = struc.field_by_name("Lambda")?;
122                let lam = lam_series.f64()?;
123
124                let out: Float64Chunked = theta
125                    .into_iter()
126                    .zip(lam.iter())
127                    .map(|(theta, lam)| match (theta, lam) {
128                        (Some(theta), Some(lam)) => Some(q(lam, theta)),
129                        _ => None,
130                    })
131                    .collect();
132
133                Ok(Some(out.into_column()))
134            },
135            GetOutput::from_type(DataType::Float64),
136        ))
137        .otherwise(lit(NULL))
138        .alias("Q"),
139    );
140
141    // Collect the final DataFrame only once at the end
142    lz.collect().unwrap_or_else(|_| DataFrame::empty())
143}
144
145/// Reads a single FITS file and converts it to a Polars DataFrame.
146///
147/// # Arguments
148///
149/// * `file_path` - Path to the FITS file to read
150/// * `header_items` - List of header values to extract
151///
152/// # Returns
153///
154/// A `Result` containing either the DataFrame or a `FitsLoaderError`.
155pub fn process_image(img: &ImageHDU) -> Result<Vec<Column>, FitsLoaderError> {
156    let bzero = img
157        .header
158        .get_card("BZERO")
159        .ok_or_else(|| FitsLoaderError::MissingHeaderKey("BZERO".into()))?
160        .value
161        .as_int()
162        .ok_or_else(|| FitsLoaderError::FitsError("BZERO not an integer".into()))?;
163
164    match &img.data {
165        ImageData::I16(image) => {
166            let data = image.map(|&x| i64::from(x as i64 + bzero));
167            // Implement row-by-row background subtraction
168            let subtracted = subtract_background(&data);
169            // Locate the index tuple with the maximum value
170            // Find the coordinates of the maximum value in the 2D array
171            let max_coords = {
172                let mut max_coords = (0, 0);
173                let mut max_val = i64::MIN;
174
175                for (idx, &val) in subtracted.indexed_iter() {
176                    if val > max_val {
177                        max_val = val;
178                        max_coords = (idx[0], idx[1]);
179                    }
180                }
181
182                max_coords
183            };
184
185            // Check if the beam is too close to any edge (top, bottom, left, right)
186            let msg = if max_coords.0 < 20
187                || max_coords.0 > (subtracted.len_of(Axis(0)) - 20)
188                || max_coords.1 < 20
189                || max_coords.1 > (subtracted.len_of(Axis(1)) - 20)
190            {
191                "Simple Detection Error: Beam is too close to the edge"
192            } else {
193                ""
194            };
195            // Calculate a simple reflectivity result from the subtracted data
196            let (db_sum, scaled_bg) = { simple_reflectivity(&subtracted, max_coords) };
197
198            Ok(vec![
199                col_from_array("RAW".into(), data.clone()).unwrap(),
200                col_from_array("SUBTRACTED".into(), subtracted.clone()).unwrap(),
201                Column::new("Simple Spot X".into(), vec![max_coords.0 as u64]),
202                Column::new("Simple Spot Y".into(), vec![max_coords.1 as u64]),
203                Column::new(
204                    "Simple Reflectivity".into(),
205                    vec![(db_sum - scaled_bg) as f64],
206                ),
207                Series::new("status".into(), vec![msg.to_string()]).into_column(),
208            ])
209        }
210        _ => Err(FitsLoaderError::UnsupportedImageData),
211    }
212}
213
214fn simple_reflectivity(
215    subtracted: &ArrayBase<OwnedRepr<i64>, Dim<IxDynImpl>>,
216    max_index: (usize, usize),
217) -> (i64, i64) {
218    // Convert max_index to 2D coordinates
219    let beam_y = max_index.0;
220    // Row
221    let beam_x = max_index.1;
222    // Column
223
224    // Define ROI size
225    let roi = 5;
226    // Region of interest size
227
228    // Define ROI boundaries
229    let roi_start_y = beam_y.saturating_sub(roi);
230    let roi_end_y = (beam_y + roi + 1).min(subtracted.len_of(Axis(0)));
231    let roi_start_x = beam_x.saturating_sub(roi);
232    let roi_end_x = (beam_x + roi + 1).min(subtracted.len_of(Axis(1)));
233
234    // Initialize sums and counts
235    let mut db_sum = 0i64;
236    let mut db_count = 0i64;
237    let mut bg_sum = 0i64;
238    let mut bg_count = 0i64;
239
240    // Iterate over all rows and columns
241    for y in 0..subtracted.len_of(Axis(0)) {
242        for x in 0..subtracted.len_of(Axis(1)) {
243            let value = subtracted[[y, x]];
244            if value == 0 {
245                continue;
246            }
247
248            if (roi_start_y <= y && y < roi_end_y) && (roi_start_x <= x && x < roi_end_x) {
249                db_sum += value;
250                db_count += 1;
251            } else {
252                bg_sum += value;
253                bg_count += 1;
254            }
255        }
256    }
257
258    // Handle edge cases
259    if bg_count == 0 || db_sum == 0 {
260        (0, 0)
261    } else {
262        // Scale background sum based on ratio of counts
263        let scaled_bg = (bg_sum * db_count) / bg_count;
264        (db_sum, scaled_bg)
265    }
266}
267
268fn subtract_background(
269    data: &ArrayBase<OwnedRepr<i64>, Dim<IxDynImpl>>,
270) -> ArrayBase<OwnedRepr<i64>, Dim<IxDynImpl>> {
271    // Get a view of the data with 5 pixels sliced from each side
272    let view = data.slice(ndarray::s![5..-5, 5..-5]);
273    let rows = view.len_of(Axis(0));
274    let cols = view.len_of(Axis(1));
275
276    // Extract the left and right columns (first and last 20 columns)
277    let left = view.slice(ndarray::s![.., ..20]);
278    let right = view.slice(ndarray::s![.., (cols - 20)..]);
279
280    // Calculate the sum of left and right regions
281    let left_sum: i64 = left.iter().copied().sum();
282    let right_sum: i64 = right.iter().copied().sum();
283
284    // Create background array to store row means
285    let mut background = ndarray::Array1::zeros(rows);
286
287    // Determine which side to use for background
288    if left_sum < right_sum {
289        // Use right side as background
290        for (i, row) in right.axis_iter(Axis(0)).enumerate() {
291            background[i] = row.iter().copied().sum::<i64>() / row.len() as i64;
292        }
293    } else {
294        // Use left side as background
295        for (i, row) in left.axis_iter(Axis(0)).enumerate() {
296            background[i] = row.iter().copied().sum::<i64>() / row.len() as i64;
297        }
298    }
299
300    // Create a new owned array from the view
301    let mut result = view.to_owned();
302
303    // Subtract background from each row
304    for (i, mut row) in result.axis_iter_mut(Axis(0)).enumerate() {
305        let bg = background[i];
306        for val in row.iter_mut() {
307            *val -= bg;
308        }
309    }
310    result.into_dyn()
311}
312
313pub fn process_metadata(
314    hdu: &PrimaryHDU,
315    keys: &Vec<String>,
316) -> Result<Vec<Column>, FitsLoaderError> {
317    if keys.is_empty() {
318        // If no specific keys are requested, return all header values
319        Ok(hdu
320            .header
321            .iter()
322            .filter(|card| !card.keyword.as_str().to_lowercase().contains("comment"))
323            .map(|card| {
324                let name = card.keyword.as_str();
325                let value = card.value.as_float().unwrap_or(0.0);
326                Column::new(name.into(), &[value])
327            })
328            .collect())
329    } else {
330        // Process each requested header key
331        let mut columns = Vec::new();
332
333        for key in keys {
334            // Special handling for Beamline Energy
335            if key == "Beamline Energy" {
336                // First try to get "Beamline Energy"
337                if let Some(card) = hdu.header.get_card(key) {
338                    if let Some(val) = card.value.as_float() {
339                        columns.push(Column::new(key.into(), &[val]));
340                        continue;
341                    }
342                }
343
344                // Then fall back to "Beamline Energy Goal" if "Beamline Energy" is not present
345                if let Some(card) = hdu.header.get_card("Beamline Energy Goal") {
346                    if let Some(val) = card.value.as_float() {
347                        columns.push(Column::new(key.into(), &[val]));
348                        continue;
349                    }
350                }
351
352                // If neither value is available, use a default
353                columns.push(Column::new(key.into(), &[0.0]));
354                continue;
355            }
356
357            // Special handling for Date header (it's a string value, not a float)
358            if key == "DATE" {
359                if let Some(card) = hdu.header.get_card(key) {
360                    let val = card.value.to_string();
361                    columns.push(Column::new(key.into(), &[val]));
362                    continue;
363                }
364                // If DATE is not present, use a default empty string
365                columns.push(Column::new(key.into(), &["".to_string()]));
366                continue;
367            }
368
369            // For other headers, don't fail if they're missing
370            let val = match hdu.header.get_card(key) {
371                Some(card) => card.value.as_float().unwrap_or(1.0),
372                None => 0.0, // Default value for missing headers
373            };
374
375            // Use the snake_case name from the enum variant
376            columns.push(Column::new(key.into(), &[val]));
377        }
378
379        Ok(columns)
380    }
381}
382
383pub fn process_file_name(path: std::path::PathBuf) -> Vec<Column> {
384    // Extract just the file name without extension
385    let file_name = path.file_stem().unwrap().to_str().unwrap_or("");
386
387    // Just return the file name directly, without extracting frame numbers or scan IDs
388    vec![Column::new("file_name".into(), vec![file_name])]
389}