use crate::error::{PandRSError, Result};
use crate::index::Index;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt::{Debug, Display};
use std::hash::Hash;
#[derive(Debug, Clone)]
pub struct MultiIndex<T>
where
T: Debug + Clone + Eq + Hash + Display,
{
levels: Vec<Vec<T>>,
codes: Vec<Vec<i32>>,
names: Vec<Option<String>>,
map: HashMap<Vec<T>, usize>,
}
impl<T> MultiIndex<T>
where
T: Debug + Clone + Eq + Hash + Display,
{
pub fn new(
levels: Vec<Vec<T>>,
codes: Vec<Vec<i32>>,
names: Option<Vec<Option<String>>>,
) -> Result<Self> {
if levels.is_empty() {
return Err(PandRSError::Index("At least one level is required".into()));
}
if levels.len() != codes.len() {
return Err(PandRSError::Index(
"The lengths of levels and codes must match".into()
));
}
for (level_idx, level_codes) in codes.iter().enumerate() {
let max_code = levels[level_idx].len() as i32 - 1;
for &code in level_codes.iter() {
if code > max_code && code != -1 {
return Err(PandRSError::Index(format!(
"Code {} in level {} is out of valid range",
code, level_idx
)));
}
}
}
let n_rows = if !codes.is_empty() { codes[0].len() } else { 0 };
for level_codes in &codes {
if level_codes.len() != n_rows {
return Err(PandRSError::Index(
"All levels must have the same number of rows".into()
));
}
}
let names = match names {
Some(n) => {
if n.len() != levels.len() {
return Err(PandRSError::Index(
"The number of names must match the number of levels".into()
));
}
n
}
None => vec![None; levels.len()],
};
let mut map = HashMap::with_capacity(n_rows);
for i in 0..n_rows {
let mut row_values = Vec::with_capacity(levels.len());
for level_idx in 0..levels.len() {
let code = codes[level_idx][i];
if code == -1 {
return Err(PandRSError::NotImplemented(
"Missing values are not supported in the current MultiIndex implementation".into()
));
} else {
row_values.push(levels[level_idx][code as usize].clone());
}
}
if map.insert(row_values.clone(), i).is_some() {
return Err(PandRSError::Index(
"Duplicate values are not allowed in MultiIndex".into()
));
}
}
Ok(MultiIndex {
levels,
codes,
names,
map,
})
}
pub fn from_tuples(tuples: Vec<Vec<T>>, names: Option<Vec<Option<String>>>) -> Result<Self> {
if tuples.is_empty() {
return Err(PandRSError::Index("An empty list of tuples was provided".into()));
}
let n_levels = tuples[0].len();
for (i, tuple) in tuples.iter().enumerate() {
if tuple.len() != n_levels {
return Err(PandRSError::Index(format!(
"All tuples must have the same length. Tuple {} has length {}, but the first tuple has length {}.",
i, tuple.len(), n_levels
)));
}
}
let mut unique_values: Vec<Vec<T>> = vec![Vec::new(); n_levels];
let mut level_maps: Vec<HashMap<T, i32>> = vec![HashMap::new(); n_levels];
let mut codes: Vec<Vec<i32>> = vec![vec![-1; tuples.len()]; n_levels];
for (row_idx, tuple) in tuples.iter().enumerate() {
for (level_idx, value) in tuple.iter().enumerate() {
let level_map = &mut level_maps[level_idx];
let code = match level_map.get(value) {
Some(&code) => code,
None => {
let new_code = unique_values[level_idx].len() as i32;
unique_values[level_idx].push(value.clone());
level_map.insert(value.clone(), new_code);
new_code
}
};
codes[level_idx][row_idx] = code;
}
}
MultiIndex::new(unique_values, codes, names)
}
pub fn get_tuple(&self, pos: usize) -> Option<Vec<T>> {
if pos >= self.len() {
return None;
}
let mut result = Vec::with_capacity(self.levels.len());
for level_idx in 0..self.levels.len() {
let code = self.codes[level_idx][pos];
if code == -1 {
return None;
}
result.push(self.levels[level_idx][code as usize].clone());
}
Some(result)
}
pub fn get_loc(&self, key: &[T]) -> Option<usize> {
if key.len() != self.levels.len() {
return None;
}
let key_vec = key.to_vec();
self.map.get(&key_vec).copied()
}
pub fn len(&self) -> usize {
if self.codes.is_empty() {
0
} else {
self.codes[0].len()
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn n_levels(&self) -> usize {
self.levels.len()
}
pub fn levels(&self) -> &[Vec<T>] {
&self.levels
}
pub fn codes(&self) -> &[Vec<i32>] {
&self.codes
}
pub fn names(&self) -> &[Option<String>] {
&self.names
}
pub fn get_level_values(&self, level: usize) -> Result<Index<T>> {
if level >= self.levels.len() {
return Err(PandRSError::Index(format!(
"Level {} is out of range. Valid levels are 0 to {}",
level,
self.levels.len() - 1
)));
}
let mut values = Vec::with_capacity(self.len());
for &code in &self.codes[level] {
if code == -1 {
return Err(PandRSError::NotImplemented(
"Extracting level values with missing values is not supported".into()
));
}
values.push(self.levels[level][code as usize].clone());
}
Index::new(values)
}
pub fn swaplevel(&self, i: usize, j: usize) -> Result<Self> {
if i >= self.levels.len() || j >= self.levels.len() {
return Err(PandRSError::Index(format!(
"Level indices are out of range. Valid levels are 0 to {}",
self.levels.len() - 1
)));
}
let mut new_levels = self.levels.clone();
let mut new_codes = self.codes.clone();
let mut new_names = self.names.clone();
new_levels.swap(i, j);
new_codes.swap(i, j);
new_names.swap(i, j);
MultiIndex::new(new_levels, new_codes, Some(new_names))
}
pub fn set_names(&mut self, names: Vec<Option<String>>) -> Result<()> {
if names.len() != self.levels.len() {
return Err(PandRSError::Index(format!(
"The number of names ({}) must match the number of levels ({})",
names.len(),
self.levels.len()
)));
}
self.names = names;
Ok(())
}
}
pub type StringMultiIndex = MultiIndex<String>;