use crate::RSLifeResult;
use calamine::{Data, Reader, Xls, open_workbook_auto};
use polars::prelude::*;
use reqwest::blocking::get;
use std::io::Cursor;
pub struct IFOAMortXLS {
pub description: String,
pub dataframe: DataFrame,
}
impl IFOAMortXLS {
pub fn from_xls_file_path_str(file_path: &str, sheet_name: &str) -> RSLifeResult<Self> {
let mut workbook = open_workbook_auto(file_path)?;
let sheet_names = workbook.sheet_names().to_owned();
if !sheet_names.iter().any(|n| n == sheet_name) {
return Err(format!("Sheet '{sheet_name}' not found in workbook").into());
}
let range = workbook.worksheet_range(sheet_name)?;
let info_from_id = get_info_from_id(sheet_name)?;
let structure = info_from_id.0;
data_process(structure, range)
}
pub fn from_url(url: &str) -> RSLifeResult<Self> {
let full_name = url
.split('/')
.next_back()
.ok_or("Invalid URL format, no sheet name found")?;
let id_owned = full_name
.split('-')
.next()
.ok_or("Invalid URL format, no sheet name found")?
.to_uppercase();
let id = id_owned.as_str();
let range = fetch_xls_range_from_url(url, id)?;
data_process(1, range)
}
pub fn from_url_id(id: &str) -> RSLifeResult<Self> {
let info_from_id = get_info_from_id(id)?;
let structure = info_from_id.0;
let url_suffix = info_from_id.1;
let url = format!("https://www.actuaries.org.uk/documents/{url_suffix}");
let id = match structure {
1 => id,
101 => {
return Err(format!(
"{id} is not supported. Use method from_ifoa_builtin instead."
)
.into());
}
_ => return Err(format!("{id} is not supported").into()),
};
let range = fetch_xls_range_from_url(&url, id)?;
data_process(structure, range)
}
pub fn from_custom(id: &str) -> RSLifeResult<Self> {
let info_from_id = get_info_from_id(id)?;
let structure = info_from_id.0;
let url_suffix = info_from_id.1;
let url = format!("https://www.actuaries.org.uk/documents/{url_suffix}");
let sheet_name = match structure {
101 => id.strip_suffix("C20").unwrap(),
_ => return Err(format!("{id} is not supported").into()),
};
let range = fetch_xls_range_from_url(&url, sheet_name)?;
data_process(structure, range)
}
}
fn data_process(structure: u32, range: calamine::Range<Data>) -> RSLifeResult<IFOAMortXLS> {
let (description, df) = match structure {
1 => data_process_01(range),
101 => data_process_101(range), _ => Err(format!("Unsupported structure {structure}.").into()),
}?;
let result = IFOAMortXLS {
description,
dataframe: df,
};
Ok(result)
}
fn data_process_01(range: calamine::Range<Data>) -> RSLifeResult<(String, DataFrame)> {
let description = extract_xls_description(&range).unwrap_or_default();
let headers = extract_xls_headers(&range);
let ncols = headers.len();
let columns = parse_xls_data(&range, ncols)?;
let age_col = &columns[0];
let mut lfs = Vec::new();
for (i, header) in headers.iter().enumerate().skip(1) {
let duration: u32 = header.parse().unwrap_or(0);
let value_col = &columns[i];
let age_col_u32: Vec<u32> = age_col.iter().map(|v| *v as u32).collect();
let df = DataFrame::new(vec![
Series::new("age".into(), age_col_u32).into_column(),
Series::new("qx".into(), value_col.clone()).into_column(),
Series::new("duration".into(), vec![duration; age_col.len()]).into_column(),
])?;
lfs.push(df.lazy());
}
let stacked = concat(&lfs, Default::default())?.collect()?;
let dataframe = if headers.len() == 2 {
stacked.drop("duration")?
} else {
stacked
};
Ok((description, dataframe))
}
fn data_process_101(range: calamine::Range<Data>) -> RSLifeResult<(String, DataFrame)> {
let (description, dataframe) = data_process_01(range)?;
let new_description = format!(
"{description}\nThis is a custom series based on the 92-series base mortality tables with C20 projection."
);
let dataframe = dataframe
.lazy()
.with_columns(vec![
when(col("age").lt(lit(60)))
.then(lit(0.13))
.when(col("age").gt_eq(lit(60)).and(col("age").lt_eq(lit(110))))
.then(lit(1.0) - lit(0.87) * (lit(110.0) - col("age")) / lit(50.0))
.when(col("age").gt_eq(lit(110)))
.then(lit(1.0))
.otherwise(lit(f64::NAN))
.alias("alpha"),
when(col("age").lt(lit(60)))
.then(lit(0.55))
.when(col("age").gt_eq(lit(60)).and(col("age").lt_eq(lit(110))))
.then(
lit(0.55) * (lit(110.0) - col("age")) / lit(50.0)
+ lit(0.29) * (col("age") - lit(60.0)) / lit(50.0),
)
.when(col("age").gt_eq(lit(110)))
.then(lit(0.29))
.otherwise(lit(f64::NAN))
.alias("f"),
])
.with_column(
(col("alpha")
+ (lit(1.0) - col("alpha"))
* (lit(1.0) - col("f")).pow(lit((2020.0 - 1992.0) / 20.0)))
.alias("reduction_factor"),
)
.with_column((col("qx") * col("reduction_factor")).alias("qx_reduced"))
.select(&[col("age"), col("qx_reduced").alias("qx")])
.collect()?;
Ok((new_description, dataframe))
}
fn get_info_from_id(id: &str) -> RSLifeResult<(u32, &str)> {
match id {
"AM80" | "AF80" | "AF80(5)" | "TM80" | "PML80" | "PFL80" | "PMA80" | "PFA80" | "IM80"
| "IF80" | "WL80" | "WA80" => Ok((1, "80-series-base-mortality-tables-complete-set")),
"AM92" | "AF92" | "TM92" | "TF92" | "IML92" | "IFL92" | "IMA92" | "IFA92" | "PML92"
| "PFL92" | "PFA92" | "PMA92" | "WL92" | "WA92" | "RMV92" | "RFV92" => {
Ok((1, "92-series-base-mortality-tables-complete-set"))
}
"AMC00" | "AMS00" | "AMN00" | "AFC00" | "AFS00" | "AFN00" | "TMC00" | "TMS00" | "TMN00"
| "TFC00" | "TFS00" | "TFN00" | "IML00" | "IFL00" | "PNML00" | "PNMA00" | "PEML00"
| "PEMA00" | "PCML00" | "PCMA00" | "PNFL00" | "PNFA00" | "PEFL00" | "PEFA00" | "PCFL00"
| "PCFA00" | "WL00" | "WA00" | "RMD00" | "RMV00" | "RMC00" | "RFD00" | "RFV00"
| "RFC00" | "PPMD00" | "PPMV00" | "PPMC00" | "PPFD00" | "PPFV00" => {
Ok((1, "00-series-base-mortality-tables-complete-set"))
}
"PMA92C20" | "PFA92C20" => Ok((101, "92-series-base-mortality-tables-complete-set")),
_ => Err(format!("Unknown id: {id}").into()),
}
}
fn fetch_xls_range_from_url(url: &str, sheet_name: &str) -> RSLifeResult<calamine::Range<Data>> {
let response = get(url)?;
let bytes = response.bytes()?;
let mut workbook = Xls::new(Cursor::new(bytes))?;
let sheet_names = workbook.sheet_names().to_owned();
if !sheet_names.iter().any(|n| n == sheet_name) {
return Err(format!("Sheet '{sheet_name}' not found in workbook").into());
}
let range = workbook.worksheet_range(sheet_name)?;
Ok(range)
}
fn extract_xls_description(range: &calamine::Range<Data>) -> Option<String> {
range.get((0, 0)).and_then(|cell| match cell {
Data::String(s) => Some(s.trim().to_string()),
Data::Empty => None,
other => Some(other.to_string()),
})
}
fn extract_xls_headers(range: &calamine::Range<Data>) -> Vec<String> {
let mut headers = Vec::new();
let mut col = 0;
loop {
let cell = range.get((2, col)); match cell {
Some(Data::String(s)) if !s.trim().is_empty() => headers.push(s.trim().to_string()),
Some(Data::Empty) | None => break,
Some(other) => headers.push(other.to_string()),
}
col += 1;
}
headers
.into_iter()
.enumerate()
.map(|(i, h)| {
if i == 0 {
"x".to_string()
} else {
let h = h.to_lowercase();
if let Some(num) = h.strip_prefix("duration ") {
num.trim_end_matches('+').trim().to_string()
} else if let Some(num) = h.strip_prefix("durations ") {
num.trim_end_matches('+').trim().to_string()
} else {
h
}
}
})
.collect()
}
fn parse_xls_data(range: &calamine::Range<Data>, ncols: usize) -> RSLifeResult<Vec<Vec<f64>>> {
let mut columns: Vec<Vec<f64>> = vec![Vec::new(); ncols];
let mut row_num = 4; loop {
let mut empty_row = true;
for (col, column) in columns.iter_mut().enumerate().take(ncols) {
let cell = range.get((row_num, col));
let val = match _parse_xls_f64_cell(cell, row_num + 1, &format!("col{col}")) {
Ok(v) => v,
Err(_) => f64::NAN,
};
if !val.is_nan() {
empty_row = false;
}
column.push(val);
}
if empty_row {
for column in columns.iter_mut().take(ncols) {
column.pop();
}
break;
}
row_num += 1;
}
Ok(columns)
}
fn _parse_xls_f64_cell(cell: Option<&Data>, row_num: usize, col_name: &str) -> RSLifeResult<f64> {
match cell {
Some(Data::Float(f)) => Ok(*f),
Some(Data::Int(v)) => Ok(*v as f64),
Some(Data::String(s)) => {
if s.trim().is_empty() {
Ok(f64::NAN)
} else {
s.parse::<f64>().map_err(|_| {
format!("Cannot parse {col_name} '{s}' at row {row_num} as number").into()
})
}
}
Some(Data::Bool(b)) => Ok(if *b { 1.0 } else { 0.0 }),
Some(Data::Empty) => Ok(f64::NAN),
Some(other) => {
Err(format!("Invalid {col_name} cell type {other:?} at row {row_num}").into())
}
None => Err(format!("Missing {col_name} cell at row {row_num}").into()),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ifoa_mort_xls_am92() {
let result = IFOAMortXLS::from_url_id("AM92");
match result {
Ok(xls) => {
println!("Description: {}", xls.description);
println!("DataFrame:\n{:?}", xls.dataframe);
}
Err(e) => panic!("Failed to load IFOA XLS: {e}"),
}
}
}