ripr 0.7.0

Find static mutation-exposure gaps before expensive mutation testing
Documentation
use crate::analysis::facts::LiteralFact;

pub(crate) fn extract_literals(body: &str) -> Vec<String> {
    let mut literals = extract_literal_facts(body, 1)
        .into_iter()
        .map(|literal| literal.value)
        .collect::<Vec<_>>();
    literals.sort();
    literals.dedup();
    literals
}

pub(crate) fn extract_literal_facts(body: &str, start_line: usize) -> Vec<LiteralFact> {
    let mut literals = Vec::new();
    for (offset, line) in body.lines().enumerate() {
        let mut cursor = 0;
        while cursor < line.len() {
            if let Some((literal, next_cursor)) = numeric_literal_at(line, cursor) {
                literals.push(LiteralFact {
                    line: start_line + offset,
                    value: literal,
                });
                cursor = next_cursor;
            } else if let Some(ch) = line[cursor..].chars().next() {
                cursor += ch.len_utf8();
            } else {
                break;
            }
        }
    }
    literals.sort_by(|a, b| a.line.cmp(&b.line).then(a.value.cmp(&b.value)));
    literals.dedup_by(|a, b| a.line == b.line && a.value == b.value);
    literals
}

fn numeric_literal_at(line: &str, cursor: usize) -> Option<(String, usize)> {
    let rest = line.get(cursor..)?;
    let first = rest.chars().next()?;
    let negative = first == '-';
    if negative && !is_unary_minus_context(line, cursor) {
        return None;
    }

    let mut current = cursor + if negative { first.len_utf8() } else { 0 };
    let digit = line.get(current..)?.chars().next()?;
    if !digit.is_ascii_digit() || is_identifier_tail_before(line, cursor) {
        return None;
    }

    let numeric_start = cursor;
    if digit == '0' {
        let prefix_cursor = current + digit.len_utf8();
        if let Some((radix, prefix_len)) = radix_prefix(line, prefix_cursor) {
            let digit_start = prefix_cursor + prefix_len;
            let digit_end = consume_digits_and_underscores(line, digit_start, radix);
            if contains_digit_for_radix(&line[digit_start..digit_end], radix) {
                let numeric_end = digit_end;
                let next_cursor = consume_literal_suffix(line, digit_end);
                let value = canonical_radix_literal(&line[numeric_start..numeric_end]);
                return Some((value, next_cursor));
            }
            return None;
        }
    }

    current = consume_digits_and_underscores(line, current, 10);

    if line
        .get(current..)
        .is_some_and(|suffix| suffix.starts_with('.'))
        && line
            .get(current + 1..)
            .and_then(|suffix| suffix.chars().next())
            .is_some_and(|ch| ch.is_ascii_digit())
    {
        current += 1;
        current = consume_digits_and_underscores(line, current, 10);
    }

    if line
        .get(current..)
        .and_then(|suffix| suffix.chars().next())
        .is_some_and(|ch| matches!(ch, 'e' | 'E'))
    {
        let exponent_start = current;
        let mut exponent_cursor = current + 1;
        if line
            .get(exponent_cursor..)
            .and_then(|suffix| suffix.chars().next())
            .is_some_and(|ch| matches!(ch, '+' | '-'))
        {
            exponent_cursor += 1;
        }
        let exponent_digits = consume_digits_and_underscores(line, exponent_cursor, 10);
        if contains_digit_for_radix(&line[exponent_cursor..exponent_digits], 10) {
            current = exponent_digits;
        } else {
            current = exponent_start;
        }
    }

    let numeric_end = current;
    let next_cursor = consume_literal_suffix(line, current);
    let value = canonical_decimal_literal(&line[numeric_start..numeric_end]);
    Some((value, next_cursor))
}

fn consume_digits_and_underscores(line: &str, mut cursor: usize, radix: u32) -> usize {
    while let Some(ch) = line.get(cursor..).and_then(|suffix| suffix.chars().next()) {
        if ch == '_' || ch.is_digit(radix) {
            cursor += ch.len_utf8();
        } else {
            break;
        }
    }
    cursor
}

fn consume_literal_suffix(line: &str, mut cursor: usize) -> usize {
    while let Some(ch) = line.get(cursor..).and_then(|suffix| suffix.chars().next()) {
        if ch == '_' || ch.is_ascii_alphanumeric() {
            cursor += ch.len_utf8();
        } else {
            break;
        }
    }
    cursor
}

fn contains_digit_for_radix(value: &str, radix: u32) -> bool {
    value.chars().any(|ch| ch.is_digit(radix))
}

fn radix_prefix(line: &str, cursor: usize) -> Option<(u32, usize)> {
    let suffix = line.get(cursor..)?;
    if suffix.starts_with('x') || suffix.starts_with('X') {
        Some((16, 1))
    } else if suffix.starts_with('o') || suffix.starts_with('O') {
        Some((8, 1))
    } else if suffix.starts_with('b') || suffix.starts_with('B') {
        Some((2, 1))
    } else {
        None
    }
}

fn canonical_decimal_literal(raw: &str) -> String {
    raw.chars()
        .filter(|ch| *ch != '_')
        .flat_map(char::to_lowercase)
        .collect()
}

fn canonical_radix_literal(raw: &str) -> String {
    raw.chars()
        .filter(|ch| *ch != '_')
        .flat_map(char::to_lowercase)
        .collect()
}

fn is_unary_minus_context(line: &str, cursor: usize) -> bool {
    line[..cursor]
        .chars()
        .rev()
        .find(|ch| !ch.is_whitespace())
        .is_none_or(|ch| {
            matches!(
                ch,
                '(' | '['
                    | '{'
                    | ','
                    | '='
                    | '!'
                    | '<'
                    | '>'
                    | '&'
                    | '|'
                    | '+'
                    | '-'
                    | '*'
                    | '/'
                    | '%'
                    | ':'
                    | ';'
            )
        })
}

fn is_identifier_tail_before(line: &str, cursor: usize) -> bool {
    line[..cursor]
        .chars()
        .next_back()
        .is_some_and(|ch| ch == '_' || ch.is_ascii_alphanumeric())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_literals_sorts_and_deduplicates_values() {
        let values = extract_literals("let a = 42;\nlet b = -7;\nlet c = 42;");

        assert_eq!(values, vec!["-7".to_string(), "42".to_string()]);
    }

    #[test]
    fn extract_literal_facts_preserves_source_lines_and_ignores_bare_minus() {
        let facts = extract_literal_facts("let a = -;\nlet b = -12;\nlet c = 3 + 3;", 40);

        assert_eq!(facts.len(), 2);
        assert_eq!(facts[0].line, 41);
        assert_eq!(facts[0].value, "-12");
        assert_eq!(facts[1].line, 42);
        assert_eq!(facts[1].value, "3");
    }

    #[test]
    fn extract_literal_facts_deduplicates_same_value_on_same_line_only() {
        let facts = extract_literal_facts("let a = 9 + 9;\nlet b = 9;", 1);

        let values_by_line = facts
            .iter()
            .map(|fact| (fact.line, fact.value.as_str()))
            .collect::<Vec<_>>();

        assert_eq!(values_by_line, vec![(1, "9"), (2, "9")]);
    }

    #[test]
    fn extract_literals_ignores_malformed_radix_prefix_without_digits() {
        let values = extract_literals("let bad = 0b_cnt + 0x_ + 0o_mode;");

        assert!(values.is_empty());
    }

    #[test]
    fn extract_literals_handles_radix_prefixes_suffixes_and_case() {
        let values =
            extract_literals("let flags = 0B1010usize + 0O77_u8 + 0Xff_i32 + 0b_1010_u16;");

        assert_eq!(values, vec!["0b1010", "0o77", "0xff"]);
    }

    #[test]
    fn extract_literals_handles_decimal_float_exponents_and_suffixes() {
        let values = extract_literals("let ratio = 1_000.50f64 + 2e+3_f32 + 4E-2 + 9e_name;");

        assert_eq!(values, vec!["1000.50", "2e+3", "4e-2", "9"]);
    }

    #[test]
    fn extract_literals_respects_identifier_and_unary_boundaries() {
        let values =
            extract_literals("let value9 = total - discount + foo_42 + (-7) + [ -8, amount*-9 ];");

        assert_eq!(values, vec!["-7", "-8", "-9"]);
    }

    #[test]
    fn extract_literals_ignores_invalid_radix_without_later_digit_leakage() {
        let values = extract_literals("let bad = 0b_2 + 0x_g1 + 0o_8;");

        assert!(values.is_empty());
    }
}