x-pipe-rs 0.1.0

Composable recommendation/feed pipeline framework built on comp-cat-rs
Documentation
//! Candidate selection: choosing final results.
//!
//! A [`Selector`] chooses candidates from scored results,
//! applying deduplication, diversity enforcement, and budget
//! limits.  [`TopNSelector`] is the default implementation
//! that sorts by score descending and takes the top N.

use std::cmp::Ordering;
use std::collections::BinaryHeap;

use comp_cat_rs::effect::io::Io;

use crate::score::ScoredCandidate;
use crate::stage::Stage;

/// Maximum number of items to return.
#[derive(Debug, Clone, Copy)]
pub struct MaxItems(usize);

impl MaxItems {
    /// Create a new max-items limit.
    #[must_use]
    pub fn new(n: usize) -> Self {
        Self(n)
    }

    /// The limit value.
    #[must_use]
    pub fn value(self) -> usize {
        self.0
    }
}

/// Budget configuration for selection.
#[derive(Debug, Clone, Copy)]
pub struct Budget {
    max_items: MaxItems,
}

impl Budget {
    /// Create a budget with a maximum item count.
    #[must_use]
    pub fn new(max_items: MaxItems) -> Self {
        Self { max_items }
    }

    /// The maximum number of items.
    #[must_use]
    pub fn max_items(&self) -> MaxItems {
        self.max_items
    }
}

/// Selects final candidates from scored results.
///
/// Handles deduplication, diversity enforcement, and budget limits.
pub trait Selector<I, E> {
    /// Select from scored candidates, respecting the budget.
    fn select(
        &self,
        candidates: Vec<ScoredCandidate<I>>,
        budget: &Budget,
    ) -> Io<E, Vec<ScoredCandidate<I>>>;
}

/// Convert a [`Selector`] into a [`Stage`].
///
/// The budget is captured by the returned stage.
pub fn selector_stage<S, I, E>(
    selector: S,
    budget: Budget,
) -> Stage<E, Vec<ScoredCandidate<I>>, Vec<ScoredCandidate<I>>>
where
    S: Selector<I, E> + Send + 'static,
    I: Send + 'static,
    E: Send + 'static,
{
    Stage::new(move |candidates| selector.select(candidates, &budget))
}

/// Default selector: sort by score descending, take top N.
///
/// # Examples
///
/// ```
/// use x_pipe_rs::{Score, ScoredCandidate, Budget, MaxItems};
/// use x_pipe_rs::selector::{TopNSelector, Selector};
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let candidates = vec![
///     ScoredCandidate::new("c", Score::new(1.0).ok_or("nan")?),
///     ScoredCandidate::new("a", Score::new(3.0).ok_or("nan")?),
///     ScoredCandidate::new("b", Score::new(2.0).ok_or("nan")?),
/// ];
/// let budget = Budget::new(MaxItems::new(2));
/// let io: comp_cat_rs::effect::io::Io<std::convert::Infallible, Vec<ScoredCandidate<&str>>> =
///     TopNSelector.select(candidates, &budget);
/// let result = io.run().map_err(|e| match e {})?;
/// assert_eq!(result.len(), 2);
/// assert_eq!(*result[0].item(), "a");
/// assert_eq!(*result[1].item(), "b");
/// # Ok(())
/// # }
/// ```
pub struct TopNSelector;

impl<I: Send + 'static, E: Send + 'static> Selector<I, E> for TopNSelector {
    fn select(
        &self,
        candidates: Vec<ScoredCandidate<I>>,
        budget: &Budget,
    ) -> Io<E, Vec<ScoredCandidate<I>>> {
        let n = budget.max_items().value();
        Io::pure(top_n_by_score(candidates, n))
    }
}

/// Wrapper for ordering [`ScoredCandidate`] by score in a [`BinaryHeap`].
struct ByScore<I>(ScoredCandidate<I>);

impl<I> Eq for ByScore<I> {}

impl<I> PartialEq for ByScore<I> {
    fn eq(&self, other: &Self) -> bool {
        self.0.score() == other.0.score()
    }
}

impl<I> PartialOrd for ByScore<I> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl<I> Ord for ByScore<I> {
    fn cmp(&self, other: &Self) -> Ordering {
        self.0.score().cmp(&other.0.score())
    }
}

/// Sort scored candidates by score descending and take the top `n`.
///
/// Uses [`BinaryHeap`] for a functional, mutation-free O(n log n) sort.
fn top_n_by_score<I>(candidates: Vec<ScoredCandidate<I>>, n: usize) -> Vec<ScoredCandidate<I>> {
    let heap: BinaryHeap<ByScore<I>> = candidates.into_iter().map(ByScore).collect();
    // into_sorted_vec returns ascending order; rev gives descending
    heap.into_sorted_vec()
        .into_iter()
        .rev()
        .take(n)
        .map(|by| by.0)
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::score::Score;

    fn score(v: f64) -> Result<Score, &'static str> {
        Score::new(v).ok_or("test score was NaN")
    }

    #[test]
    fn top_n_selects_highest_scores() -> Result<(), &'static str> {
        let candidates = vec![
            ScoredCandidate::new("c", score(1.0)?),
            ScoredCandidate::new("a", score(3.0)?),
            ScoredCandidate::new("b", score(2.0)?),
        ];
        let result = top_n_by_score(candidates, 2);
        assert_eq!(result.len(), 2);
        assert_eq!(*result[0].item(), "a");
        assert_eq!(*result[1].item(), "b");
        Ok(())
    }

    #[test]
    fn top_n_with_n_greater_than_len_returns_all() -> Result<(), &'static str> {
        let candidates = vec![
            ScoredCandidate::new("a", score(1.0)?),
            ScoredCandidate::new("b", score(2.0)?),
        ];
        let result = top_n_by_score(candidates, 10);
        assert_eq!(result.len(), 2);
        Ok(())
    }

    #[test]
    fn top_n_with_zero_returns_empty() -> Result<(), &'static str> {
        let candidates = vec![
            ScoredCandidate::new("a", score(1.0)?),
        ];
        let result = top_n_by_score(candidates, 0);
        assert!(result.is_empty());
        Ok(())
    }

    #[test]
    fn top_n_empty_input() {
        let candidates: Vec<ScoredCandidate<&str>> = Vec::new();
        let result = top_n_by_score(candidates, 5);
        assert!(result.is_empty());
    }
}