nu_plugin_polars 0.113.0

Nushell dataframe plugin commands based on polars.
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::shell_error::generic::GenericError;
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape};
use polars::prelude::PlSmallStr;

use crate::{
    PolarsPlugin,
    values::{CustomValueSupport, NuDataFrame, PolarsPluginType},
};

pub struct CutSeries;

impl PluginCommand for CutSeries {
    type Plugin = PolarsPlugin;
    fn name(&self) -> &str {
        "polars cut"
    }

    fn description(&self) -> &str {
        "Bin continuous values into discrete categories for a series."
    }

    fn signature(&self) -> nu_protocol::Signature {
        Signature::build(self.name())
            .required("breaks", SyntaxShape::Any, "Dataframe that contains a series of unique cut points.")
            .named(
                "labels",
                SyntaxShape::List(Box::new(SyntaxShape::String)),
                "Names of the categories. The number of labels must be equal to the number of cut points plus one.",
                Some('l'),
            )
            .switch("left_closed", "Set the intervals to be left-closed instead of right-closed.", Some('c'))
            .switch("include_breaks", "Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.", Some('b'))
            .input_output_types(vec![
                (
                    PolarsPluginType::NuDataFrame.into(),
                    PolarsPluginType::NuDataFrame.into(),
                ),
                (
                    PolarsPluginType::NuLazyFrame.into(),
                    PolarsPluginType::NuLazyFrame.into(),
                ),
            ])
            .category(Category::Custom("dataframe".into()))
    }

    fn examples(&self) -> Vec<Example<'_>> {
        vec![Example {
            description: "Divide the column into three categories.",
            example: r#"[-2, -1, 0, 1, 2] | polars into-df | polars cut [-1, 1] --labels ["a", "b", "c"]"#,
            result: None,
        }]
    }

    fn run(
        &self,
        plugin: &Self::Plugin,
        engine: &EngineInterface,
        call: &EvaluatedCall,
        mut input: PipelineData,
    ) -> Result<PipelineData, nu_protocol::LabeledError> {
        let metadata = input.take_metadata();
        command(plugin, engine, call, input)
            .map_err(|e| e.into())
            .map(|pd| pd.set_metadata(metadata))
    }
}

fn command(
    plugin: &PolarsPlugin,
    engine: &EngineInterface,
    call: &EvaluatedCall,
    input: PipelineData,
) -> Result<PipelineData, ShellError> {
    let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
    let series = df.as_series(call.head)?;

    let breaks = call.req::<Vec<f64>>(0)?;

    let labels: Option<Vec<PlSmallStr>> = call.get_flag::<Vec<String>>("labels")?.map(|l| {
        l.into_iter()
            .map(PlSmallStr::from)
            .collect::<Vec<PlSmallStr>>()
    });

    let left_closed = call.has_flag("left_closed")?;
    let include_breaks = call.has_flag("include_breaks")?;

    let new_series = polars_ops::series::cut(&series, breaks, labels, left_closed, include_breaks)
        .map_err(|e| {
            ShellError::Generic(GenericError::new(
                "Error cutting series",
                e.to_string(),
                call.head,
            ))
        })?;

    NuDataFrame::try_from_series(new_series, call.head)?.to_pipeline_data(plugin, engine, call.head)
}