rustdoc_seeker/
seeker.rs

1use fst::{Automaton, IntoStreamer, Map, MapBuilder};
2use itertools::Itertools;
3use std::{
4    cmp::{Ord, Ordering},
5    collections::BTreeSet,
6    fmt,
7    iter::FromIterator,
8    u32,
9};
10use string_cache::DefaultAtom as Atom;
11
12macro_rules! enum_number {
13    ($name:ident { $($variant:ident | $display:tt | $value:tt, )* }) => {
14        /// TypeItem represent an item with type,
15        /// Use `Display` or `fmt_url` to get the `type dot name` format of the item.
16        ///
17        /// # Example
18        ///
19        /// ```
20        /// assert_eq!("module.vec", TypeItme::Module("vec"));
21        /// assert_eq!("macro.vec", TypeItme::Macro("vec"));
22        ///
23        /// assert_eq!("fn.vec", TypeItme::Function("vec")); // the only two exceptions
24        /// assert_eq!("type.vec", TypeItme::Typedef("vec")); // the only two exceptions
25        /// ```
26        #[derive(Clone, Debug, Eq, PartialEq)]
27        pub enum $name {
28            $($variant(Atom),)*
29        }
30
31        impl $name {
32            pub fn new(tag: usize, data: Atom) -> $name {
33                match tag {
34                    $( $value => $name::$variant(data), )*
35                    _ => unreachable!()
36                }
37            }
38        }
39
40        impl AsRef<Atom> for $name {
41            fn as_ref(&self) -> &Atom {
42                match self {
43                    $( $name::$variant(data) => data, )*
44                }
45            }
46        }
47
48        impl fmt::Display for $name {
49            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
50                match self {
51                    $( $name::$variant(data) => write!(f, "{}.{}", $display, data), )*
52                }
53            }
54        }
55    }
56}
57
58enum_number!(TypeItem {
59    Module          | "module"          | 0,
60    ExternCrate     | "externcrate"     | 1,
61    Import          | "import"          | 2,
62    Struct          | "struct"          | 3,
63    Enum            | "enum"            | 4,
64    Function        | "fn"              | 5,
65    Typedef         | "type"            | 6,
66    Static          | "static"          | 7,
67    Trait           | "trait"           | 8,
68    Impl            | "impl"            | 9,
69    TyMethod        | "tymethod"        | 10,
70    Method          | "method"          | 11,
71    StructField     | "structfield"     | 12,
72    Variant         | "variant"         | 13,
73    Macro           | "macro"           | 14,
74    Primitive       | "primitive"       | 15,
75    AssociatedType  | "associatedtype"  | 16,
76    Constant        | "constant"        | 17,
77    AssociatedConst | "associatedconst" | 18,
78    Union           | "union"           | 19,
79    ForeignType     | "foreigntype"     | 20,
80    Keyword         | "keyword"         | 21,
81    Existential     | "existential"     | 22,
82});
83
84/// DocItem represent a searchable item,
85/// Use `Display` to get the relative URI of the item.
86///
87/// eg:
88///
89/// The `dedup` (name) function of the `Vec`(parent) struct in `std::vec`(path) module.
90///
91/// The `Vec`(name) struct of `None`(parent) in `std::vec`(path) module.
92///
93/// The `vec`(name) module of `None`(parent) in `std`(path) module.
94///
95/// # Example
96///
97/// ```
98/// println!("{} is the url of {:?}", &docitem, &docitem)
99/// ```
100#[derive(Debug, Eq)]
101pub struct DocItem {
102    pub name: TypeItem,
103    pub parent: Option<TypeItem>,
104    pub path: Atom,
105    pub desc: Atom,
106}
107
108impl DocItem {
109    pub fn new(name: TypeItem, parent: Option<TypeItem>, path: Atom, desc: Atom) -> DocItem {
110        DocItem {
111            name,
112            parent,
113            path,
114            desc,
115        }
116    }
117
118    pub fn fmt_naive<W: fmt::Write>(&self, f: &mut W) -> fmt::Result {
119        write!(f, "{}::", self.path)?;
120        if let Some(ref parent) = self.parent {
121            write!(f, "{}::", parent)?;
122        };
123        write!(f, "{}", self.name)
124    }
125
126    pub fn fmt_url<W: fmt::Write>(&self, f: &mut W) -> fmt::Result {
127        for part in self.path.split("::") {
128            write!(f, "{}/", part)?;
129        }
130        if let Some(ref parent) = self.parent {
131            write!(f, "{}.html#{}", parent, self.name)
132        } else {
133            match &self.name {
134                TypeItem::Module(name) => write!(f, "{}/index.html", name),
135                _ => write!(f, "{}.html", self.name),
136            }
137        }
138    }
139
140    fn parent_atom(&self) -> Option<&Atom> {
141        self.parent.as_ref().map(|p| p.as_ref())
142    }
143
144    fn index_key(&self) -> &[u8] {
145        self.name.as_ref().as_bytes()
146    }
147}
148
149impl PartialEq for DocItem {
150    fn eq(&self, other: &DocItem) -> bool {
151        self.name == other.name && self.parent == other.parent && self.path == other.path
152    }
153}
154
155impl Ord for DocItem {
156    fn cmp(&self, other: &Self) -> Ordering {
157        self.index_key()
158            .cmp(&other.index_key())
159            .then_with(|| self.path.cmp(&other.path))
160            .then_with(|| self.parent_atom().cmp(&other.parent_atom()))
161    }
162}
163
164impl PartialOrd for DocItem {
165    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
166        Some(self.cmp(other))
167    }
168}
169
170impl fmt::Display for DocItem {
171    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
172        self.fmt_url(f)
173    }
174}
175
176/// RustDoc contains DocItems, which could be convert to RustDocSeeker.
177///
178/// # Example
179///
180/// ```
181/// let data = fs::read_to_string("search-index.js").unwrap();
182/// let rustdoc: RustDoc = data.parse().unwrap();
183///
184/// // let's combine RustDoc
185/// rustdoc_a.extend(rustdoc_b)
186/// ```
187#[derive(Debug)]
188pub struct RustDoc {
189    items: BTreeSet<DocItem>,
190}
191
192impl Extend<DocItem> for RustDoc {
193    fn extend<T: IntoIterator<Item=DocItem>>(&mut self, iter: T) {
194        self.items.extend(iter);
195    }
196}
197
198impl FromIterator<DocItem> for RustDoc {
199    fn from_iter<I: IntoIterator<Item=DocItem>>(iter: I) -> Self {
200        RustDoc {
201            items: iter.into_iter().collect(),
202        }
203    }
204}
205
206impl IntoIterator for RustDoc {
207    type IntoIter = <BTreeSet<DocItem> as IntoIterator>::IntoIter;
208    type Item = DocItem;
209
210    fn into_iter(self) -> Self::IntoIter {
211        self.items.into_iter()
212    }
213}
214
215impl RustDoc {
216    pub fn new(items: BTreeSet<DocItem>) -> RustDoc {
217        RustDoc {
218            items,
219        }
220    }
221
222    pub fn iter(&self) -> impl Iterator<Item=&DocItem> {
223        self.items.iter()
224    }
225
226    /// Build an index for searching
227    pub fn build(self) -> RustDocSeeker {
228        let mut builder = MapBuilder::memory();
229        let items = self.items.into_iter().collect_vec().into_boxed_slice();
230
231        assert!(items.len() as u64 <= u32::MAX as u64);
232
233        {
234            let groups = items
235                .iter()
236                .enumerate()
237                .group_by(|(_, item)| item.index_key());
238
239            for (key, mut group) in groups.into_iter() {
240                let (start, _) = group.next().unwrap();
241                let end = group.last().map_or(start, |(i, _)| i) + 1;
242                let val = ((start as u64) << 32) + end as u64;
243                // We already sort and dedup using BTreeSet, so it always safe to unwrap.
244                builder.insert(key, val).unwrap();
245            }
246        }
247
248        let index = builder.into_map();
249        RustDocSeeker {
250            items,
251            index,
252        }
253    }
254}
255
256/// RustDocSeeker contains DocItems and Index for fast searching.
257///
258/// The index is kv-map for <name, idx: u64 = (start: u32 << 32) + end: u32>
259/// where items[start..end] having the same DocItem.name.
260///
261/// # Example
262///
263/// ```
264/// let seeker = rustdoc.build();
265/// ```
266#[derive(Debug)]
267pub struct RustDocSeeker {
268    items: Box<[DocItem]>,
269    index: Map<Vec<u8>>,
270}
271
272impl RustDocSeeker {
273    /// Search with fst::Automaton, read fst::automaton / fst-levenshtein / fst-regex for details.
274    ///
275    /// # Example
276    ///
277    /// ```
278    /// let aut = fst_regex::Regex::new(".*dedup.*").unwrap();
279    /// for i in seeker.search(aut) {
280    ///     println!("{:?}", i);
281    /// }
282    ///
283    /// let aut = fst_levenshtein::Levenshtein::new("dedXp", 1).unwrap();
284    /// for i in seeker.search(aut) {
285    ///     println!("{:?}", i);
286    /// }
287    ///
288    /// let aut = fst::automaton::Subsequence::new("dedup");
289    /// for i in seeker.search(aut) {
290    ///     println!("{:?}", i);
291    /// }
292    /// ```
293    pub fn search<A: Automaton>(&self, aut: &A) -> impl Iterator<Item=&DocItem> {
294        let result = self.index.search(aut).into_stream().into_values();
295
296        result.into_iter().flat_map(move |idx| {
297            let start = (idx >> 32) as usize;
298            let end = (idx & 0xffffffff) as usize;
299            &self.items[start..end]
300        })
301    }
302}