prx 0.5.9

Praxis — agent-native Unix tools. Single binary replacing grep, cat, find, sed, diff for AI coding agents.
pub mod boosting;
pub mod penalties;
pub mod proximity;

use std::collections::HashMap;

use crate::search::graph::ImportGraph;

pub fn rerank(
    scores: &mut HashMap<usize, f32>,
    chunk_texts: &[String],
    file_paths: &[String],
    query: &str,
    top_k: usize,
    graph: Option<&ImportGraph>,
) -> Vec<(usize, f32)> {
    rerank_with_config(
        scores,
        chunk_texts,
        file_paths,
        query,
        top_k,
        graph,
        &RerankConfig::default(),
    )
}

#[derive(Clone)]
pub struct RerankConfig {
    pub file_coherence: bool,
    pub definitions: bool,
    pub stem_matches: bool,
    pub proximity: bool,
    pub noise_penalties: bool,
    pub saturation_decay: bool,
}

impl Default for RerankConfig {
    fn default() -> Self {
        Self {
            file_coherence: true,
            definitions: true,
            stem_matches: true,
            proximity: true,
            noise_penalties: true,
            saturation_decay: true,
        }
    }
}

pub fn rerank_with_config(
    scores: &mut HashMap<usize, f32>,
    chunk_texts: &[String],
    file_paths: &[String],
    query: &str,
    top_k: usize,
    graph: Option<&ImportGraph>,
    config: &RerankConfig,
) -> Vec<(usize, f32)> {
    if config.file_coherence {
        boosting::boost_file_coherence(scores, file_paths);
    }
    if config.definitions {
        boosting::boost_definitions(scores, chunk_texts, file_paths, query);
    }
    if config.stem_matches {
        boosting::boost_stem_matches(scores, file_paths, query);
    }
    if config.proximity {
        if let Some(g) = graph {
            proximity::boost_proximity(scores, file_paths, g);
        }
    }
    if config.noise_penalties {
        penalties::apply_noise_penalties(scores, file_paths);
    }

    let mut ranked: Vec<(usize, f32)> = scores.iter().map(|(&id, &s)| (id, s)).collect();
    ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));

    if config.saturation_decay {
        penalties::apply_saturation_decay(&ranked, file_paths, top_k)
    } else {
        ranked.truncate(top_k);
        ranked
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn full_pipeline_runs() {
        let mut scores = HashMap::from([(0, 1.0), (1, 0.8), (2, 0.5)]);
        let texts = vec![
            "fn authenticate(user: &User) -> Token { }".to_string(),
            "let result = authenticate(current_user);".to_string(),
            "fn process(data: &[u8]) -> Result".to_string(),
        ];
        let paths = vec![
            "src/auth.rs".to_string(),
            "src/handler.rs".to_string(),
            "tests/test_process.py".to_string(),
        ];
        let result = rerank(&mut scores, &texts, &paths, "authenticate", 3, None);

        assert!(!result.is_empty());
        assert_eq!(result[0].0, 0, "definition chunk should rank first");
    }

    #[test]
    fn test_file_ranks_last() {
        let mut scores = HashMap::from([(0, 1.0), (1, 1.0)]);
        let texts = vec!["fn auth() {}".to_string(), "fn test_auth() {}".to_string()];
        let paths = vec!["src/auth.rs".to_string(), "tests/test_auth.py".to_string()];
        let result = rerank(&mut scores, &texts, &paths, "auth", 2, None);
        assert_eq!(result[0].0, 0, "source file should rank above test file");
    }

    #[test]
    fn empty_scores_no_panic() {
        let mut scores = HashMap::new();
        let result = rerank(&mut scores, &[], &[], "test", 5, None);
        assert!(result.is_empty());
    }
}