hlbc_indexing/
lib.rs

1use std::cmp::Ordering;
2use std::collections::BinaryHeap;
3
4use hlbc::types::RefFun;
5use hlbc::Bytecode;
6
7#[cfg(feature = "tantivy")]
8mod tantivy;
9
10pub trait Searcher {
11    fn search(&self, code: &Bytecode, needle: &str, limit: usize) -> Vec<RefFun>;
12}
13
14struct Comp<T>(T, f32);
15
16impl<T> Eq for Comp<T> {}
17
18impl<T> PartialEq<Self> for Comp<T> {
19    fn eq(&self, other: &Comp<T>) -> bool {
20        self.1.eq(&other.1)
21    }
22}
23
24impl<T> PartialOrd<Self> for Comp<T> {
25    fn partial_cmp(&self, other: &Comp<T>) -> Option<Ordering> {
26        other.1.partial_cmp(&self.1)
27    }
28}
29
30impl<T> Ord for Comp<T> {
31    fn cmp(&self, other: &Comp<T>) -> Ordering {
32        other.1.total_cmp(&self.1)
33    }
34}
35
36pub fn top_candidates<'a, T>(n: usize, results: impl Iterator<Item = (T, f32)>) -> Vec<(T, f32)> {
37    let mut top = BinaryHeap::with_capacity(n + 1);
38    for (c, score) in results {
39        if score > 0.0 {
40            top.push(Comp(c, score));
41            if top.len() > n {
42                top.pop();
43            }
44        }
45    }
46    top.into_sorted_vec()
47        .into_iter()
48        .map(|c| (c.0, c.1))
49        .collect()
50}
51
52pub struct Contains;
53
54impl Searcher for Contains {
55    fn search(&self, code: &Bytecode, needle: &str, limit: usize) -> Vec<RefFun> {
56        let needle_len = needle.len() as f32;
57        top_candidates(
58            limit,
59            code.functions().map(|f| {
60                let name = f.name(code);
61                let len = name.len() as f32;
62                (
63                    f.findex(),
64                    if name.contains(needle) {
65                        needle_len / len
66                    } else if needle.contains(&*name) {
67                        len / needle_len
68                    } else {
69                        0.0
70                    },
71                )
72            }),
73        )
74        .into_iter()
75        .map(|(c, s)| c)
76        .collect()
77    }
78}
79
80// pub struct Memchr;
81//
82// impl Searcher for Memchr {
83//     fn with_needle<'a>(&self, needle: &'a str) -> Box<dyn Matcher + 'a> {
84//         Box::new(MemchrMatcher(memchr::memmem::Finder::new(needle)))
85//     }
86// }
87//
88// pub struct MemchrMatcher<'a>(memchr::memmem::Finder<'a>);
89//
90// impl Matcher for MemchrMatcher<'_> {
91//     fn eval(&self, candidate: &str) -> f32 {
92//         if self.0.find(candidate.as_bytes()).is_some() {
93//             self.0.needle().len() as f32 / candidate.len() as f32
94//         } else if memchr::memmem::find(self.0.needle(), candidate.as_bytes()).is_some() {
95//             candidate.len() as f32 / self.0.needle().len() as f32
96//         } else {
97//             0.0
98//         }
99//     }
100// }
101
102pub struct ClangdSearcher(fuzzy_matcher::clangd::ClangdMatcher);
103
104impl ClangdSearcher {
105    pub fn new() -> Self {
106        Self(fuzzy_matcher::clangd::ClangdMatcher::default().ignore_case())
107    }
108}
109
110impl Searcher for ClangdSearcher {
111    fn search(&self, code: &Bytecode, needle: &str, limit: usize) -> Vec<RefFun> {
112        top_candidates(
113            limit,
114            code.functions().map(|f| {
115                (
116                    f.findex(),
117                    fuzzy_matcher::FuzzyMatcher::fuzzy_match(&self.0, &f.name(code), needle)
118                        .map(|s| s as f32)
119                        .unwrap_or(0.0),
120                )
121            }),
122        )
123        .into_iter()
124        .map(|(c, s)| c)
125        .collect()
126    }
127}
128
129pub struct SkimSearcher(fuzzy_matcher::skim::SkimMatcherV2);
130
131impl SkimSearcher {
132    pub fn new() -> Self {
133        Self(fuzzy_matcher::skim::SkimMatcherV2::default().ignore_case())
134    }
135}
136
137impl Searcher for SkimSearcher {
138    fn search(&self, code: &Bytecode, needle: &str, limit: usize) -> Vec<RefFun> {
139        top_candidates(
140            limit,
141            code.functions().map(|f| {
142                (
143                    f.findex(),
144                    fuzzy_matcher::FuzzyMatcher::fuzzy_match(&self.0, &f.name(code), needle)
145                        .map(|s| s as f32)
146                        .unwrap_or(0.0),
147                )
148            }),
149        )
150        .into_iter()
151        .map(|(c, s)| c)
152        .collect()
153    }
154}