lean-ctx 3.1.3 - Docs.rs

//! Neural line importance scorer using ONNX inference via rten.
//!
//! Replaces the heuristic IB-Filter with a trained model that predicts
//! per-line importance based on structural features.
//!
//! When no ONNX model is available, falls back to the decision-tree
//! implementation (static rules generated by distill.py).

use std::path::Path;

#[cfg(feature = "neural")]
use rten::Model;

pub struct NeuralLineScorer {
    #[cfg(feature = "neural")]
    model: Model,
    #[cfg(not(feature = "neural"))]
    _phantom: (),
}

#[derive(Debug, Clone)]
pub struct LineFeatures {
    pub line_length: f64,
    pub indentation_level: f64,
    pub token_diversity: f64,
    pub is_definition: f64,
    pub is_import: f64,
    pub is_comment: f64,
    pub is_closing: f64,
    pub keyword_density: f64,
    pub position_normalized: f64,
    pub has_type_annotation: f64,
    pub nesting_depth: f64,
    pub prev_line_type: f64,
    pub next_line_type: f64,
}

impl LineFeatures {
    pub fn from_line(line: &str, position: f64, context: &LineContext) -> Self {
        let trimmed = line.trim();
        let leading = (line.len() - line.trim_start().len()) as f64;

        Self {
            line_length: trimmed.len() as f64,
            indentation_level: leading / 4.0,
            token_diversity: Self::compute_token_diversity(trimmed),
            is_definition: if Self::check_definition(trimmed) {
                1.0
            } else {
                0.0
            },
            is_import: if Self::check_import(trimmed) {
                1.0
            } else {
                0.0
            },
            is_comment: if Self::check_comment(trimmed) {
                1.0
            } else {
                0.0
            },
            is_closing: if Self::check_closing(trimmed) {
                1.0
            } else {
                0.0
            },
            keyword_density: Self::compute_keyword_density(trimmed),
            position_normalized: position,
            has_type_annotation: if Self::check_type_annotation(trimmed) {
                1.0
            } else {
                0.0
            },
            nesting_depth: context.nesting_depth as f64,
            prev_line_type: context.prev_line_type as f64,
            next_line_type: context.next_line_type as f64,
        }
    }

    pub fn to_array(&self) -> [f64; 13] {
        [
            self.line_length,
            self.indentation_level,
            self.token_diversity,
            self.is_definition,
            self.is_import,
            self.is_comment,
            self.is_closing,
            self.keyword_density,
            self.position_normalized,
            self.has_type_annotation,
            self.nesting_depth,
            self.prev_line_type,
            self.next_line_type,
        ]
    }

    fn compute_token_diversity(line: &str) -> f64 {
        let tokens: Vec<&str> = line.split_whitespace().collect();
        if tokens.is_empty() {
            return 0.0;
        }
        let unique: std::collections::HashSet<&str> = tokens.iter().copied().collect();
        unique.len() as f64 / tokens.len() as f64
    }

    fn check_definition(line: &str) -> bool {
        const STARTERS: &[&str] = &[
            "fn ",
            "pub fn ",
            "async fn ",
            "pub async fn ",
            "def ",
            "async def ",
            "function ",
            "export function ",
            "async function ",
            "class ",
            "export class ",
            "struct ",
            "pub struct ",
            "enum ",
            "pub enum ",
            "trait ",
            "pub trait ",
            "impl ",
            "type ",
            "pub type ",
            "interface ",
            "export interface ",
        ];
        STARTERS.iter().any(|s| line.starts_with(s))
    }

    fn check_import(line: &str) -> bool {
        line.starts_with("import ")
            || line.starts_with("use ")
            || line.starts_with("from ")
            || line.starts_with("#include")
            || line.starts_with("require(")
    }

    fn check_comment(line: &str) -> bool {
        line.starts_with("//")
            || line.starts_with('#')
            || line.starts_with("/*")
            || line.starts_with('*')
            || line.starts_with("///")
    }

    fn check_closing(line: &str) -> bool {
        matches!(line, "}" | "};" | "})" | "]" | ");" | "end")
    }

    fn check_type_annotation(line: &str) -> bool {
        line.contains("->")
            || line.contains("=>")
            || line.contains(": ")
            || line.contains("Result<")
            || line.contains("Option<")
    }

    fn compute_keyword_density(line: &str) -> f64 {
        const KEYWORDS: &[&str] = &[
            "fn",
            "let",
            "mut",
            "pub",
            "use",
            "impl",
            "struct",
            "enum",
            "match",
            "if",
            "else",
            "for",
            "while",
            "return",
            "async",
            "await",
            "trait",
            "where",
            "def",
            "class",
            "import",
            "from",
            "function",
            "export",
            "const",
            "var",
            "type",
            "interface",
            "try",
            "catch",
            "throw",
            "yield",
            "raise",
        ];
        let tokens: Vec<&str> = line.split_whitespace().collect();
        if tokens.is_empty() {
            return 0.0;
        }
        let hits = tokens
            .iter()
            .filter(|t| {
                let clean = t.trim_end_matches(|c: char| !c.is_alphanumeric());
                KEYWORDS.contains(&clean)
            })
            .count();
        hits as f64 / tokens.len() as f64
    }
}

#[derive(Debug, Clone, Default)]
pub struct LineContext {
    pub nesting_depth: usize,
    pub prev_line_type: u8,
    pub next_line_type: u8,
}

impl NeuralLineScorer {
    #[cfg(feature = "neural")]
    pub fn load(model_path: &Path) -> anyhow::Result<Self> {
        let model = Model::load_file(model_path)?;
        Ok(Self { model })
    }

    #[cfg(not(feature = "neural"))]
    pub fn load(_model_path: &Path) -> anyhow::Result<Self> {
        anyhow::bail!("Neural feature not enabled. Compile with --features neural")
    }

    pub fn score_line(&self, line: &str, position: f64, task_keywords: &[String]) -> f64 {
        let context = LineContext::default();
        let features = LineFeatures::from_line(line, position, &context);
        self.score_from_features(&features, task_keywords)
    }

    pub fn score_from_features(&self, features: &LineFeatures, _task_keywords: &[String]) -> f64 {
        #[cfg(feature = "neural")]
        {
            self.neural_score(features)
        }
        #[cfg(not(feature = "neural"))]
        {
            self.decision_tree_score(features)
        }
    }

    #[cfg(feature = "neural")]
    fn neural_score(&self, features: &LineFeatures) -> f64 {
        use rten_tensor::{AsView, NdTensor};

        let input_data = features.to_array();
        let float_data: Vec<f32> = input_data.iter().map(|&x| x as f32).collect();
        let input = NdTensor::from_data([1, 13], float_data);

        match self.model.run_one(input.into(), None) {
            Ok(output) => {
                let tensor: Vec<f32> = output
                    .into_tensor::<f32>()
                    .map(|t| t.to_vec())
                    .unwrap_or_default();
                tensor.first().copied().unwrap_or(0.5) as f64
            }
            Err(_) => 0.5,
        }
    }

    #[cfg(not(feature = "neural"))]
    fn decision_tree_score(&self, features: &LineFeatures) -> f64 {
        let f = features.to_array();

        let mut score = 0.5;

        if f[3] > 0.5 {
            score += 0.3; // is_definition
        }
        if f[5] > 0.5 {
            score -= 0.2; // is_comment
        }
        if f[6] > 0.5 {
            score -= 0.3; // is_closing
        }
        if f[4] > 0.5 {
            score -= 0.1; // is_import
        }
        if f[9] > 0.5 {
            score += 0.15; // has_type_annotation
        }

        let pos = f[8];
        let u_curve = if pos <= 0.5 {
            1.0 - 0.6 * (2.0 * pos).powi(2)
        } else {
            1.0 - 0.6 * (2.0 * (1.0 - pos)).powi(2)
        };
        score *= u_curve;

        score.clamp(0.0, 1.0)
    }
}

pub fn score_all_lines(
    lines: &[&str],
    scorer: &NeuralLineScorer,
    task_keywords: &[String],
) -> Vec<f64> {
    let n = lines.len();
    let mut nesting_depth: usize = 0;

    lines
        .iter()
        .enumerate()
        .map(|(i, line)| {
            let trimmed = line.trim();
            nesting_depth = nesting_depth
                .saturating_add(trimmed.matches('{').count())
                .saturating_sub(trimmed.matches('}').count());

            let prev_type = if i > 0 {
                classify_type(lines[i - 1].trim())
            } else {
                0
            };
            let next_type = if i + 1 < n {
                classify_type(lines[i + 1].trim())
            } else {
                0
            };
            let position = i as f64 / (n.max(1) - 1).max(1) as f64;

            let context = LineContext {
                nesting_depth,
                prev_line_type: prev_type,
                next_line_type: next_type,
            };
            let features = LineFeatures::from_line(line, position, &context);
            scorer.score_from_features(&features, task_keywords)
        })
        .collect()
}

fn classify_type(line: &str) -> u8 {
    if line.is_empty() {
        return 0;
    }
    if LineFeatures::check_definition(line) {
        return 1;
    }
    if LineFeatures::check_import(line) {
        return 2;
    }
    if LineFeatures::check_comment(line) {
        return 3;
    }
    if LineFeatures::check_closing(line) {
        return 5;
    }
    4 // logic
}