pyref_core/
loader.rs

1use astrors_fork::fits;
2use astrors_fork::io::hdulist::HDU;
3
4use polars::{lazy::prelude::*, prelude::*};
5use rayon::prelude::*;
6use std::fs;
7use std::path::PathBuf;
8
9use crate::enums::{ExperimentType, HeaderValue};
10use crate::errors::FitsLoaderError;
11use crate::io::{add_calculated_domains, process_file_name, process_image, process_metadata};
12
13/// Reads a single FITS file and converts it to a Polars DataFrame.
14///
15/// # Arguments
16///
17/// * `file_path` - Path to the FITS file to read
18/// * `header_items` - List of header values to extract
19///
20/// # Returns
21///
22/// A `Result` containing either the DataFrame or a `FitsLoaderError`.
23pub fn read_fits(
24    file_path: std::path::PathBuf,
25    header_items: &Vec<HeaderValue>,
26) -> Result<DataFrame, FitsLoaderError> {
27    if file_path.extension().and_then(|ext| ext.to_str()) != Some("fits") {
28        return Err(FitsLoaderError::NoData);
29    }
30
31    let hdul = fits::fromfile(
32        file_path
33            .to_str()
34            .ok_or_else(|| FitsLoaderError::InvalidFileName("Invalid path".into()))?,
35    )?;
36
37    let meta = match hdul.hdus.get(0) {
38        Some(HDU::Primary(hdu)) => process_metadata(hdu, header_items)?,
39        _ => return Err(FitsLoaderError::NoData),
40    };
41
42    let img_data = match hdul.hdus.get(2) {
43        Some(HDU::Image(hdu)) => process_image(hdu)?,
44        _ => return Err(FitsLoaderError::NoData),
45    };
46
47    let names = process_file_name(file_path);
48
49    let mut columns = meta;
50    columns.extend(img_data);
51    columns.extend(names);
52
53    DataFrame::new(columns).map_err(FitsLoaderError::PolarsError)
54}
55
56/// Reads all FITS files in a directory and combines them into a single DataFrame.
57///
58/// # Arguments
59///
60/// * `dir` - Path to the directory containing FITS files
61/// * `header_items` - List of header values to extract
62///
63/// # Returns
64///
65/// A `Result` containing either the combined DataFrame or a `FitsLoaderError`.
66pub fn read_experiment(
67    dir: &str,
68    header_items: &Vec<HeaderValue>,
69) -> Result<DataFrame, FitsLoaderError> {
70    let dir_path = std::path::PathBuf::from(dir);
71
72    if !dir_path.exists() {
73        return Err(FitsLoaderError::NoData);
74    }
75
76    let entries: Vec<_> = fs::read_dir(dir)
77        .map_err(FitsLoaderError::IoError)?
78        .par_bridge()
79        .filter_map(|entry| entry.ok())
80        .filter(|entry| entry.path().extension().and_then(|ext| ext.to_str()) == Some("fits"))
81        .collect();
82
83    let dataframes: Result<Vec<DataFrame>, FitsLoaderError> = entries
84        .par_iter()
85        .map(|entry| read_fits(entry.path(), &header_items))
86        .collect();
87
88    let combined_df = dataframes?
89        .into_par_iter()
90        .reduce_with(|acc, df| acc.vstack(&df).unwrap_or(DataFrame::empty()))
91        .ok_or(FitsLoaderError::NoData)?;
92
93    Ok(add_calculated_domains(combined_df.lazy()))
94}
95
96/// Reads multiple specific FITS files and combines them into a single DataFrame.
97///
98/// # Arguments
99///
100/// * `file_paths` - Vector of paths to the FITS files to read
101/// * `header_items` - List of header values to extract
102///
103/// # Returns
104///
105/// A `Result` containing either the combined DataFrame or a `FitsLoaderError`.
106pub fn read_multiple_fits(
107    file_paths: Vec<PathBuf>,
108    header_items: &Vec<HeaderValue>,
109) -> Result<DataFrame, FitsLoaderError> {
110    if file_paths.is_empty() {
111        return Err(FitsLoaderError::NoData);
112    }
113
114    let dataframes: Result<Vec<DataFrame>, FitsLoaderError> = file_paths
115        .par_iter()
116        .map(|path| read_fits(path.clone(), header_items))
117        .collect();
118
119    let combined_df = dataframes?
120        .into_par_iter()
121        .reduce_with(|acc, df| acc.vstack(&df).unwrap_or(DataFrame::empty()))
122        .ok_or(FitsLoaderError::NoData)?;
123
124    Ok(add_calculated_domains(combined_df.lazy()))
125}
126
127/// Reads FITS files matching a pattern and combines them into a single DataFrame.
128///
129/// # Arguments
130///
131/// * `dir` - Directory containing FITS files
132/// * `pattern` - Glob pattern to match files (e.g., "Y6_refl_*.fits")
133/// * `experiment_type` - Type of experiment
134///
135/// # Returns
136///
137/// A `Result` containing either the combined DataFrame or a `FitsLoaderError`.
138pub fn read_experiment_pattern(
139    dir: &str,
140    pattern: &str,
141    experiment_type: ExperimentType,
142) -> Result<DataFrame, FitsLoaderError> {
143    let dir_path = std::path::PathBuf::from(dir);
144
145    if !dir_path.exists() {
146        return Err(FitsLoaderError::NoData);
147    }
148
149    let header_items = experiment_type.get_keys();
150
151    let entries: Vec<_> = fs::read_dir(dir)
152        .map_err(FitsLoaderError::IoError)?
153        .par_bridge()
154        .filter_map(|entry| entry.ok())
155        .filter(|entry| {
156            let path = entry.path();
157            path.extension().and_then(|ext| ext.to_str()) == Some("fits")
158                && match path.file_name().and_then(|name| name.to_str()) {
159                    Some(name) => glob_match::glob_match(pattern, name),
160                    None => false,
161                }
162        })
163        .map(|entry| entry.path())
164        .collect();
165
166    read_multiple_fits(entries, &header_items)
167}
168
169// Utility test function
170pub fn _load() {
171    let test_path = "C:/Users/hduva/.projects/pyref-ccd/testing/Y6_refl_ 001096 Images/Y6_refl_ 001096 CCD 000.fits";
172    let hdus = ExperimentType::Xrr.get_keys();
173    let data = read_fits(test_path.into(), &hdus).unwrap();
174    println!("{:?}", data);
175}