use crate::Kinetics::experimental_kinetics::experiment_series_main::{TGAExperiment, TGASeries};
use crate::Kinetics::experimental_kinetics::experiment_series2::UnitedDataset;
use crate::Kinetics::experimental_kinetics::one_experiment_dataset::{
ColumnNature, TGADomainError,
};
use polars::prelude::*;
impl TGASeries {
fn select_experiments<'a>(
&'a self,
what_exp_to_take: Option<&[&str]>,
) -> Result<Vec<&'a TGAExperiment>, TGADomainError> {
match what_exp_to_take {
Some(ids) => ids
.iter()
.map(|id| {
let idx = self.exp_map.get(*id).ok_or_else(|| {
TGADomainError::InvalidOperation(format!(
"Experiment with id '{}' was not found",
id
))
})?;
Ok(&self.experiments[*idx])
})
.collect(),
None => Ok(self.experiments.iter().collect()),
}
}
pub fn concat_into_vertical_stack(
&self,
what_exp_to_take: Option<&[&str]>,
what_cols_take: Vec<ColumnNature>,
) -> Result<UnitedDataset, TGADomainError> {
if what_cols_take.is_empty() {
return Err(TGADomainError::InvalidOperation(
"No column natures were requested".to_string(),
));
}
let experiments = self.select_experiments(what_exp_to_take)?;
if experiments.is_empty() {
return Ok(UnitedDataset::empty());
}
let mut frames = Vec::with_capacity(experiments.len());
let mut meta = Vec::with_capacity(experiments.len());
for exp in experiments {
let mut select_exprs = Vec::with_capacity(what_cols_take.len());
for nature in &what_cols_take {
let source_col = exp.dataset.get_column_by_nature(*nature).ok_or_else(|| {
TGADomainError::InvalidOperation(format!(
"Experiment '{}' has no column with nature {:?}",
exp.meta.id, nature
))
})?;
select_exprs
.push(col(source_col).alias(UnitedDataset::canonical_column_name(*nature)));
}
let selected = exp
.dataset
.frame
.clone()
.select(select_exprs)
.with_column(lit(exp.meta.id.clone()).alias("exp_id"));
frames.push(selected);
meta.push(exp.meta.clone());
}
let combined = concat(frames, UnionArgs::default())?;
Ok(UnitedDataset::new(combined, meta))
}
pub fn concat_into_polars_struct(
&self,
what_exp_to_take: Option<&[&str]>,
what_cols_take: &[ColumnNature],
) -> Result<UnitedDataset, TGADomainError> {
if what_cols_take.is_empty() {
return Err(TGADomainError::InvalidOperation(
"No column natures were requested".to_string(),
));
}
let stacked = self.concat_into_vertical_stack(what_exp_to_take, what_cols_take.to_vec())?;
let struct_fields: Vec<Expr> = what_cols_take
.iter()
.map(|nature| col(UnitedDataset::canonical_column_name(*nature)))
.collect();
let aggregated = stacked
.frame
.clone()
.group_by([col("exp_id")])
.agg([as_struct(struct_fields).alias("data")]);
Ok(UnitedDataset::new(aggregated, stacked.meta))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Kinetics::experimental_kinetics::experiment_series_main::TGAExperiment;
use crate::Kinetics::experimental_kinetics::testing_mod::VirtualTGA;
fn build_series() -> TGASeries {
let mut series = TGASeries::new();
let v1 = VirtualTGA {
time: vec![0.0, 1.0, 2.0],
temperature: vec![300.0, 301.0, 302.0],
mass: vec![10.0, 9.8, 9.6],
};
let v2 = VirtualTGA {
time: vec![0.0, 1.0, 2.0],
temperature: vec![310.0, 311.0, 312.0],
mass: vec![8.0, 7.9, 7.8],
};
let d1 = crate::Kinetics::experimental_kinetics::one_experiment_dataset::TGADataset::create_from_synthetic_data(&v1).unwrap();
let d2 = crate::Kinetics::experimental_kinetics::one_experiment_dataset::TGADataset::create_from_synthetic_data(&v2).unwrap();
series.push(TGAExperiment::new(d1).with_id("exp_1"));
series.push(TGAExperiment::new(d2).with_id("exp_2"));
series
}
#[test]
fn concat_into_vertical_stack_builds_united_dataset() {
let series = build_series();
let united = series
.concat_into_vertical_stack(None, vec![ColumnNature::Time, ColumnNature::Mass])
.unwrap();
let df = united.frame.collect().unwrap();
assert_eq!(df.height(), 6);
assert!(df.column("time").is_ok());
assert!(df.column("mass").is_ok());
assert!(df.column("exp_id").is_ok());
assert_eq!(united.meta.len(), 2);
}
#[test]
fn concat_into_polars_struct_builds_struct_column() {
let series = build_series();
let united = series
.concat_into_polars_struct(None, &[ColumnNature::Time, ColumnNature::Mass])
.unwrap();
let df = united.frame.collect().unwrap();
assert_eq!(df.height(), 2);
assert!(df.column("exp_id").is_ok());
assert!(df.column("data").is_ok());
let dtype = df.column("data").unwrap().dtype();
assert!(
matches!(dtype, DataType::List(inner) if matches!(inner.as_ref(), DataType::Struct(_)))
);
}
#[test]
fn materialize_vertical_column_by_nature_returns_values_for_one_id() {
let series = build_series();
let united = series
.concat_into_vertical_stack(None, vec![ColumnNature::Time, ColumnNature::Mass])
.unwrap();
let mass = united
.materialize_vertical_column_by_nature("exp_1", ColumnNature::Mass)
.unwrap();
assert_eq!(mass, vec![10.0, 9.8, 9.6]);
}
#[test]
fn materialize_struct_field_by_nature_returns_values_for_one_id() {
let series = build_series();
let united = series
.concat_into_polars_struct(None, &[ColumnNature::Time, ColumnNature::Mass])
.unwrap();
let time = united
.materialize_struct_field_by_nature("exp_2", ColumnNature::Time)
.unwrap();
assert_eq!(time, vec![0.0, 1.0, 2.0]);
}
}