Skip to main content

source_map_php/
query.rs

1use std::collections::BTreeSet;
2
3const STOP_WORDS: &[&str] = &[
4    "the", "a", "an", "is", "are", "where", "what", "how", "for", "to", "before", "after", "and",
5    "or", "in", "of", "with", "on",
6];
7
8pub fn compact_query(input: &str) -> String {
9    let mut seen = BTreeSet::new();
10    let mut result = Vec::new();
11
12    for token in input
13        .split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '\\')
14        .filter(|token| !token.is_empty())
15    {
16        let lower = token.to_ascii_lowercase();
17        if STOP_WORDS.contains(&lower.as_str()) {
18            continue;
19        }
20        if seen.insert(lower) {
21            result.push(token.to_string());
22        }
23        if result.len() >= 10 {
24            break;
25        }
26    }
27
28    result.join(" ")
29}
30
31#[cfg(test)]
32mod tests {
33    use super::compact_query;
34
35    #[test]
36    fn strips_noise_and_limits_words() {
37        let query = compact_query(
38            "where is consent checked before discharge export in the patient service and controller",
39        );
40        assert_eq!(
41            query,
42            "consent checked discharge export patient service controller"
43        );
44    }
45}