source-map-php 0.1.3

CLI-first PHP code search indexer for Laravel and Hyperf repositories
Documentation
use std::collections::BTreeSet;

const STOP_WORDS: &[&str] = &[
    "the", "a", "an", "is", "are", "where", "what", "how", "for", "to", "before", "after", "and",
    "or", "in", "of", "with", "on",
];

pub fn compact_query(input: &str) -> String {
    let mut seen = BTreeSet::new();
    let mut result = Vec::new();

    for token in input
        .split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '\\')
        .filter(|token| !token.is_empty())
    {
        let lower = token.to_ascii_lowercase();
        if STOP_WORDS.contains(&lower.as_str()) {
            continue;
        }
        if seen.insert(lower) {
            result.push(token.to_string());
        }
        if result.len() >= 10 {
            break;
        }
    }

    result.join(" ")
}

#[cfg(test)]
mod tests {
    use super::compact_query;

    #[test]
    fn strips_noise_and_limits_words() {
        let query = compact_query(
            "where is consent checked before discharge export in the patient service and controller",
        );
        assert_eq!(
            query,
            "consent checked discharge export patient service controller"
        );
    }
}