use pandrs::error::Result;
use pandrs::series::{CategoricalOrder, StringCategorical};
use pandrs::{DataFrame, Series, NA};
use std::path::Path;
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn main() -> Result<()> {
println!("=== Example of Categorical Data with Missing Values ===\n");
println!("1. Create categorical data");
let values = vec![
NA::Value("Red".to_string()),
NA::Value("Blue".to_string()),
NA::NA, NA::Value("Green".to_string()),
NA::Value("Red".to_string()), ];
let cat = StringCategorical::from_na_vec(
values.clone(),
None, Some(CategoricalOrder::Unordered), )?;
println!("Categories: {:?}", cat.categories());
println!("Number of categories: {}", cat.categories().len());
println!("Number of data: {}", cat.len());
println!("Internal codes: {:?}", cat.codes());
println!();
println!("2. Create ordered categorical data");
let ordered_categories = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
let values = vec![
NA::Value("Medium".to_string()),
NA::Value("Low".to_string()),
NA::NA, NA::Value("High".to_string()),
NA::Value("Medium".to_string()), ];
let ordered_cat = StringCategorical::from_na_vec(
values.clone(),
Some(ordered_categories), Some(CategoricalOrder::Ordered), )?;
println!("Ordered categories: {:?}", ordered_cat.categories());
println!("Number of categories: {}", ordered_cat.categories().len());
println!("Number of data: {}", ordered_cat.len());
println!("Internal codes: {:?}", ordered_cat.codes());
println!();
println!("3. Operations on categorical data");
let values1 = vec![
NA::Value("A".to_string()),
NA::Value("B".to_string()),
NA::NA,
NA::Value("C".to_string()),
];
let values2 = vec![
NA::Value("B".to_string()),
NA::Value("C".to_string()),
NA::Value("D".to_string()),
NA::NA,
];
let cat1 = StringCategorical::from_na_vec(values1, None, None)?;
let cat2 = StringCategorical::from_na_vec(values2, None, None)?;
let union = cat1.union(&cat2)?; let intersection = cat1.intersection(&cat2)?; let difference = cat1.difference(&cat2)?;
println!("Categories of set 1: {:?}", cat1.categories());
println!("Categories of set 2: {:?}", cat2.categories());
println!("Union: {:?}", union.categories());
println!("Intersection: {:?}", intersection.categories());
println!("Difference (set 1 - set 2): {:?}", difference.categories());
println!();
println!("4. Using categorical columns in DataFrame");
let values = vec![
NA::Value("High".to_string()),
NA::Value("Medium".to_string()),
NA::NA,
NA::Value("Low".to_string()),
];
let order_cats = vec!["Low".to_string(), "Medium".to_string(), "High".to_string()];
let cat_eval = StringCategorical::from_na_vec(
values.clone(), Some(order_cats),
Some(CategoricalOrder::Ordered),
)?;
println!("Size of created categorical data: {}", cat_eval.len());
let categoricals = vec![("Evaluation".to_string(), cat_eval)];
let mut df = DataFrame::from_categoricals(categoricals)?;
println!("Number of rows in DataFrame: {}", df.row_count());
println!("Note: NA values are excluded when creating DataFrame");
let scores = vec![95, 80, 0]; println!("Size of scores: {}", scores.len());
df.add_column(
"Score".to_string(),
Series::new(scores, Some("Score".to_string()))?,
)?;
println!("DataFrame: ");
println!("{df:#?}");
println!(
"Is 'Evaluation' column categorical: {}",
df.is_categorical("Evaluation")
);
match df.get_categorical::<String>("Evaluation") {
Ok(cat_col) => println!(
"Categories of 'Evaluation' column: {:?}",
cat_col.categories()
),
Err(_) => println!("Failed to retrieve categories of 'Evaluation' column"),
}
println!();
println!("5. Input and output with CSV file");
let temp_path = Path::new("/tmp/categorical_example.csv");
df.to_csv(temp_path)?;
println!("Saved to CSV file: {}", temp_path.display());
let df_loaded = DataFrame::from_csv(temp_path, true)?;
println!("Data loaded from CSV:");
println!("{df_loaded:#?}");
println!("Example of data format loaded from CSV:");
println!(
"First value of 'Evaluation' column: {:?}",
df_loaded
.get_column::<String>("Evaluation")
.unwrap()
.values()[0]
);
let new_values = vec![
NA::Value("High".to_string()),
NA::Value("Medium".to_string()),
NA::NA,
NA::Value("Low".to_string()),
];
let new_cat =
StringCategorical::from_na_vec(new_values, None, Some(CategoricalOrder::Ordered))?;
println!("Example of newly created categorical data:");
println!("Categories: {:?}", new_cat.categories());
println!("Order: {:?}", new_cat.ordered());
println!("\nTo actually convert data loaded from CSV to categorical data,");
println!("parsing processing according to the format and string escaping method of the CSV is required.");
println!("\n=== Sample End ===");
Ok(())
}