use numpy::{PyArray1, PyReadonlyArray1};
use pyo3::prelude::*;
use crate::encoding::{EncodingMap, OrderedTargetEncoder};
#[pyclass(name = "OrderedTargetEncoder")]
pub struct PyOrderedTargetEncoder {
inner: OrderedTargetEncoder,
}
#[pymethods]
impl PyOrderedTargetEncoder {
#[new]
#[pyo3(signature = (smoothing=10.0))]
fn new(smoothing: f64) -> PyResult<Self> {
if smoothing < 0.0 {
return Err(pyo3::exceptions::PyValueError::new_err(
"smoothing must be non-negative",
));
}
Ok(Self {
inner: OrderedTargetEncoder::new(smoothing),
})
}
fn reset(&mut self) {
self.inner.reset();
}
fn encode_and_update(&mut self, category: &str, target: f64) -> f64 {
self.inner.encode_and_update(category, target)
}
fn encode_column<'py>(
&mut self,
py: Python<'py>,
categories: Vec<String>,
targets: PyReadonlyArray1<'py, f64>,
) -> PyResult<Bound<'py, PyArray1<f64>>> {
let targets_arr = targets.as_array();
if categories.len() != targets_arr.len() {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
"categories length {} doesn't match targets length {}",
categories.len(),
targets_arr.len()
)));
}
let targets_vec: Vec<f64> = targets_arr.to_vec();
let encoded = self.inner.encode_column(&categories, &targets_vec);
Ok(PyArray1::from_vec(py, encoded))
}
fn encode_inference(&self, category: &str) -> f64 {
self.inner.encode_inference(category)
}
fn encode_inference_batch<'py>(
&self,
py: Python<'py>,
categories: Vec<String>,
) -> Bound<'py, PyArray1<f64>> {
let encoded: Vec<f64> = categories
.iter()
.map(|c| self.inner.encode_inference(c))
.collect();
PyArray1::from_vec(py, encoded)
}
fn get_encoding_map(&self) -> PyEncodingMap {
PyEncodingMap {
inner: self.inner.get_encoding_map(),
}
}
fn __repr__(&self) -> String {
"OrderedTargetEncoder()".to_string()
}
}
#[pyclass(name = "EncodingMap")]
#[derive(Clone)]
pub struct PyEncodingMap {
inner: EncodingMap,
}
#[pymethods]
impl PyEncodingMap {
fn encode(&self, category: &str) -> f64 {
self.inner.encode(category)
}
fn encode_batch<'py>(
&self,
py: Python<'py>,
categories: Vec<String>,
) -> Bound<'py, PyArray1<f64>> {
let encoded = self.inner.encode_batch(&categories);
PyArray1::from_vec(py, encoded)
}
#[getter]
fn default_value(&self) -> f64 {
self.inner.default_value
}
#[getter]
fn smoothing(&self) -> f64 {
self.inner.smoothing
}
#[getter]
fn num_categories(&self) -> usize {
self.inner.encodings.len()
}
fn items(&self) -> Vec<(String, f64)> {
self.inner.encodings.clone()
}
fn __repr__(&self) -> String {
format!(
"EncodingMap(num_categories={}, default={:.4})",
self.inner.encodings.len(),
self.inner.default_value
)
}
fn __len__(&self) -> usize {
self.inner.encodings.len()
}
}
impl From<EncodingMap> for PyEncodingMap {
fn from(map: EncodingMap) -> Self {
Self { inner: map }
}
}
pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyOrderedTargetEncoder>()?;
m.add_class::<PyEncodingMap>()?;
Ok(())
}