cargo_docs_md/multi_crate/
search.rs

1//! Search index generation for multi-crate documentation.
2//!
3//! This module provides [`SearchIndexGenerator`] which creates a JSON search index
4//! containing all documented items across multiple crates. The index can be used
5//! with client-side search libraries like Fuse.js, Lunr.js, or `FlexSearch`.
6//!
7//! # Output Format
8//!
9//! The generated `search_index.json` contains:
10//!
11//! ```json
12//! {
13//!   "items": [
14//!     {
15//!       "name": "Span",
16//!       "path": "tracing::span::Span",
17//!       "kind": "struct",
18//!       "crate": "tracing",
19//!       "file": "tracing/span/index.md",
20//!       "summary": "A handle representing a span..."
21//!     }
22//!   ]
23//! }
24//! ```
25//!
26//! # Usage
27//!
28//! ```ignore
29//! let generator = SearchIndexGenerator::new(&crates);
30//! generator.write(Path::new("generated_docs/"))?;
31//! ```
32
33use std::collections::{HashMap, HashSet};
34use std::path::Path;
35
36use rustdoc_types::{Crate, Id, ItemEnum, Visibility};
37use serde::Serialize;
38
39use super::{CrateCollection, RUST_PATH_SEP};
40
41/// A single searchable item in the index.
42///
43/// Contains all metadata needed for search and display in results.
44#[derive(Debug, Serialize)]
45pub struct SearchEntry {
46    /// Item name (e.g., "Span", "spawn", "Error").
47    pub name: String,
48
49    /// Full path including crate (e.g., "`tracing::span::Span`").
50    pub path: String,
51
52    /// Item kind for filtering and display.
53    ///
54    /// One of: "mod", "struct", "enum", "trait", "fn", "type", "const", "macro"
55    pub kind: &'static str,
56
57    /// Crate this item belongs to.
58    #[serde(rename = "crate")]
59    pub crate_name: String,
60
61    /// Relative file path to the markdown documentation.
62    pub file: String,
63
64    /// First line of documentation for preview in search results.
65    ///
66    /// `None` if the item has no documentation.
67    #[serde(skip_serializing_if = "Option::is_none")]
68    pub summary: Option<String>,
69}
70
71/// The complete search index containing all searchable items.
72///
73/// Serialized to `search_index.json` for client-side consumption.
74#[derive(Debug, Serialize)]
75pub struct SearchIndex {
76    /// All searchable items across all crates.
77    pub items: Vec<SearchEntry>,
78}
79
80/// Generator for multi-crate search indices.
81///
82/// Traverses all crates in a [`CrateCollection`] and builds a comprehensive
83/// search index of all public items (or all items if `include_private` is set).
84///
85/// # Example
86///
87/// ```ignore
88/// let crates = MultiCrateParser::parse_directory(Path::new("target/doc"))?;
89/// let rendered_items = generator.generate();  // Returns HashMap<String, HashSet<Id>>
90/// let generator = SearchIndexGenerator::new(&crates, false, rendered_items);
91/// generator.write(Path::new("generated_docs/"))?;
92/// ```
93pub struct SearchIndexGenerator<'a> {
94    /// Collection of crates to index.
95    crates: &'a CrateCollection,
96
97    /// Whether to include private items in the search index.
98    ///
99    /// When false (default), only public items are indexed.
100    /// When true, all items regardless of visibility are indexed.
101    include_private: bool,
102
103    /// Set of item IDs that were actually rendered per crate.
104    ///
105    /// Only items in this set will appear in the search index.
106    /// This ensures the search index matches the generated documentation.
107    rendered_items: HashMap<String, HashSet<Id>>,
108}
109
110impl<'a> SearchIndexGenerator<'a> {
111    /// Create a new search index generator.
112    ///
113    /// # Arguments
114    ///
115    /// * `crates` - Collection of parsed crates to index
116    /// * `include_private` - Whether to include non-public items
117    /// * `rendered_items` - Map of crate name to set of rendered item IDs
118    #[must_use]
119    pub const fn new(
120        crates: &'a CrateCollection,
121        include_private: bool,
122        rendered_items: HashMap<String, HashSet<Id>>,
123    ) -> Self {
124        Self {
125            crates,
126            include_private,
127            rendered_items,
128        }
129    }
130
131    /// Generate the complete search index.
132    ///
133    /// Traverses all crates and collects searchable items including:
134    /// - Modules
135    /// - Structs
136    /// - Enums
137    /// - Traits
138    /// - Functions
139    /// - Type aliases
140    /// - Constants
141    /// - Macros
142    ///
143    /// Items are sorted alphabetically by name for consistent output.
144    #[must_use]
145    pub fn generate(&self) -> SearchIndex {
146        let mut items = Vec::new();
147
148        for (crate_name, krate) in self.crates.iter() {
149            self.index_crate(&mut items, crate_name, krate);
150        }
151
152        // Sort by name for consistent, deterministic output
153        items.sort_by(|a, b| a.name.cmp(&b.name));
154
155        SearchIndex { items }
156    }
157
158    /// Write the search index to `search_index.json` in the output directory.
159    ///
160    /// # Arguments
161    ///
162    /// * `output_dir` - Directory where `search_index.json` will be written
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if the file cannot be written.
167    pub fn write(&self, output_dir: &Path) -> std::io::Result<()> {
168        let index = self.generate();
169        let json = serde_json::to_string_pretty(&index)?;
170        let path = output_dir.join("search_index.json");
171        fs_err::write(path, json)?;
172        Ok(())
173    }
174
175    /// Index all items in a single crate.
176    ///
177    /// Only indexes items that were actually rendered (present in `rendered_items`).
178    fn index_crate(&self, items: &mut Vec<SearchEntry>, crate_name: &str, krate: &Crate) {
179        // Get the set of rendered items for this crate
180        let rendered_set = self.rendered_items.get(crate_name);
181
182        // Build a map of item ID to module path for accurate path construction
183        let path_map = Self::build_path_map(krate);
184
185        for (id, item) in &krate.index {
186            let Some(name) = &item.name else { continue };
187
188            // Filter by rendered items - only include items that were actually rendered
189            if let Some(rendered) = rendered_set
190                && !rendered.contains(id)
191            {
192                continue;
193            }
194
195            // Filter by visibility unless include_private is set
196            if !self.include_private && !matches!(item.visibility, Visibility::Public) {
197                continue;
198            }
199
200            // Determine item kind and whether to include
201            let kind = match &item.inner {
202                ItemEnum::Module(_) => "mod",
203                ItemEnum::Struct(_) => "struct",
204                ItemEnum::Enum(_) => "enum",
205                ItemEnum::Trait(_) => "trait",
206                ItemEnum::Function(_) => "fn",
207                ItemEnum::TypeAlias(_) => "type",
208                ItemEnum::Constant { .. } => "const",
209                ItemEnum::Macro(_) => "macro",
210                // Skip other item types (impl blocks, fields, variants, etc.)
211                _ => continue,
212            };
213
214            // Build full path (crate::module::Item)
215            let module_path = path_map.get(id).cloned().unwrap_or_default();
216            let full_path = if module_path.is_empty() {
217                format!("{crate_name}::{name}")
218            } else {
219                format!("{crate_name}::{module_path}::{name}")
220            };
221
222            // Build file path based on module location
223            let file = Self::compute_file_path(crate_name, &module_path, kind);
224
225            // Extract first line of documentation as summary
226            let summary = item
227                .docs
228                .as_ref()
229                .and_then(|d| d.lines().next())
230                .map(str::to_string);
231
232            items.push(SearchEntry {
233                name: name.clone(),
234                path: full_path,
235                kind,
236                crate_name: crate_name.to_string(),
237                file,
238                summary,
239            });
240        }
241    }
242
243    /// Build a map from item ID to its module path.
244    ///
245    /// This allows us to reconstruct the full path for each item.
246    fn build_path_map(krate: &Crate) -> HashMap<Id, String> {
247        let mut path_map = HashMap::new();
248
249        // Use the crate's paths table which maps IDs to their paths
250        for (id, item_summary) in &krate.paths {
251            // Skip external items (from other crates)
252            if item_summary.crate_id != 0 {
253                continue;
254            }
255
256            // Build path from components, excluding the item name itself
257            let path_components = &item_summary.path;
258            if path_components.len() > 1 {
259                // Path without the crate name and without the item name
260                let module_path = path_components[1..path_components.len() - 1].join("::");
261                path_map.insert(*id, module_path);
262            } else {
263                path_map.insert(*id, String::new());
264            }
265        }
266
267        path_map
268    }
269
270    /// Compute the file path for an item based on its module location.
271    fn compute_file_path(crate_name: &str, module_path: &str, kind: &str) -> String {
272        if module_path.is_empty() {
273            // Root-level item
274            format!("{crate_name}/index.md")
275        } else if kind == "mod" {
276            // Module gets its own directory
277            // Convert Rust path separators (::) to file path separators (/)
278            let path = module_path.replace(RUST_PATH_SEP, "/");
279            format!("{crate_name}/{path}/index.md")
280        } else {
281            // Item within a module - link to the module's index
282            let path = module_path.replace(RUST_PATH_SEP, "/");
283            format!("{crate_name}/{path}/index.md")
284        }
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    #[test]
293    fn test_search_entry_serialization() {
294        let entry = SearchEntry {
295            name: "Span".to_string(),
296            path: "tracing::span::Span".to_string(),
297            kind: "struct",
298            crate_name: "tracing".to_string(),
299            file: "tracing/span/index.md".to_string(),
300            summary: Some("A handle representing a span.".to_string()),
301        };
302
303        let json = serde_json::to_string(&entry).unwrap();
304        assert!(json.contains("\"name\":\"Span\""));
305        assert!(json.contains("\"kind\":\"struct\""));
306        assert!(json.contains("\"crate\":\"tracing\""));
307    }
308
309    #[test]
310    fn test_search_entry_without_summary() {
311        let entry = SearchEntry {
312            name: "foo".to_string(),
313            path: "crate::foo".to_string(),
314            kind: "fn",
315            crate_name: "crate".to_string(),
316            file: "crate/index.md".to_string(),
317            summary: None,
318        };
319
320        let json = serde_json::to_string(&entry).unwrap();
321        // summary should be skipped when None
322        assert!(!json.contains("summary"));
323    }
324}