use std::{cmp::Ordering, iter::repeat};
use opendp_derive::bootstrap;
use crate::{
core::{Function, StabilityMap, Transformation},
data::Column,
domains::{AtomDomain, VectorDomain},
error::Fallible,
metrics::SymmetricDistance,
traits::Hashable,
};
use super::{DataFrame, DataFrameDomain};
#[cfg(feature = "ffi")]
mod ffi;
fn conform_records<'a>(len: usize, records: &[Vec<&'a str>]) -> Vec<Vec<&'a str>> {
records
.iter()
.map(|record| match record.len().cmp(&len) {
Ordering::Less =>
{
record
.clone()
.into_iter()
.chain(repeat("").take(len - record.len()))
.collect()
}
Ordering::Equal =>
{
record.clone()
}
Ordering::Greater =>
{
record[0..len].to_vec()
}
})
.collect()
}
fn create_dataframe<K: Hashable>(col_names: Vec<K>, records: &[Vec<&str>]) -> DataFrame<K> {
let records = conform_records(col_names.len(), &records);
col_names
.into_iter()
.enumerate()
.map(|(i, col_name)| {
(
col_name,
Column::new(records.iter().map(|record| record[i].to_string()).collect()),
)
})
.collect()
}
#[bootstrap(features("contrib"))]
#[deprecated(note = "Use Polars instead.", since = "0.12.0")]
pub fn make_create_dataframe<K>(
col_names: Vec<K>,
) -> Fallible<
Transformation<
VectorDomain<VectorDomain<AtomDomain<String>>>,
SymmetricDistance,
DataFrameDomain<K>,
SymmetricDistance,
>,
>
where
K: Hashable,
{
Transformation::new(
VectorDomain::new(VectorDomain::new(AtomDomain::default())),
SymmetricDistance,
DataFrameDomain::new(),
SymmetricDistance,
Function::new(move |arg: &Vec<Vec<String>>| -> DataFrame<K> {
let arg: Vec<_> = arg.iter().map(|e| vec_string_to_str(e)).collect();
create_dataframe(col_names.clone(), &arg)
}),
StabilityMap::new_from_constant(1),
)
}
fn split_dataframe<K: Hashable>(separator: &str, col_names: Vec<K>, s: &str) -> DataFrame<K> {
let lines = split_lines(s);
let records = split_records(separator, &lines);
let records = conform_records(col_names.len(), &records);
create_dataframe(col_names, &records)
}
#[bootstrap(
features("contrib"),
arguments(separator(c_type = "char *", rust_type = b"null"))
)]
#[deprecated(note = "Use Polars instead.", since = "0.12.0")]
pub fn make_split_dataframe<K>(
separator: Option<&str>,
col_names: Vec<K>,
) -> Fallible<
Transformation<AtomDomain<String>, SymmetricDistance, DataFrameDomain<K>, SymmetricDistance>,
>
where
K: Hashable,
{
let separator = separator.unwrap_or(",").to_owned();
Transformation::new(
AtomDomain::default(),
SymmetricDistance,
DataFrameDomain::new(),
SymmetricDistance,
Function::new(move |arg: &String| split_dataframe(&separator, col_names.clone(), &arg)),
StabilityMap::new_from_constant(1),
)
}
fn vec_string_to_str(src: &[String]) -> Vec<&str> {
src.iter().map(|e| e.as_str()).collect()
}
fn vec_str_to_string(src: Vec<&str>) -> Vec<String> {
src.into_iter().map(|e| e.to_owned()).collect()
}
fn split_lines(s: &str) -> Vec<&str> {
s.lines().collect()
}
#[bootstrap(features("contrib"))]
pub fn make_split_lines() -> Fallible<
Transformation<
AtomDomain<String>,
SymmetricDistance,
VectorDomain<AtomDomain<String>>,
SymmetricDistance,
>,
> {
Transformation::new(
AtomDomain::<String>::default(),
SymmetricDistance,
VectorDomain::new(AtomDomain::default()),
SymmetricDistance,
Function::new(|arg: &String| -> Vec<String> {
arg.lines().map(|v| v.to_owned()).collect()
}),
StabilityMap::new_from_constant(1),
)
}
fn split_records<'a>(separator: &str, lines: &[&'a str]) -> Vec<Vec<&'a str>> {
fn split<'a>(line: &'a str, separator: &str) -> Vec<&'a str> {
line.split(separator)
.into_iter()
.map(|e| e.trim())
.collect()
}
lines.iter().map(|e| split(e, separator)).collect()
}
#[bootstrap(
features("contrib"),
arguments(separator(c_type = "char *", rust_type = b"null"))
)]
pub fn make_split_records(
separator: Option<&str>,
) -> Fallible<
Transformation<
VectorDomain<AtomDomain<String>>,
SymmetricDistance,
VectorDomain<VectorDomain<AtomDomain<String>>>,
SymmetricDistance,
>,
> {
let separator = separator.unwrap_or(",").to_owned();
Transformation::new(
VectorDomain::new(AtomDomain::default()),
SymmetricDistance,
VectorDomain::new(VectorDomain::new(AtomDomain::default())),
SymmetricDistance,
Function::new(move |arg: &Vec<String>| -> Vec<Vec<String>> {
let arg = vec_string_to_str(arg);
let ret = split_records(&separator, &arg);
ret.into_iter().map(vec_str_to_string).collect()
}),
StabilityMap::new_from_constant(1),
)
}
#[cfg(test)]
mod test;