cargo_docs_md/multi_crate/search.rs
1//! Search index generation for multi-crate documentation.
2//!
3//! This module provides [`SearchIndexGenerator`] which creates a JSON search index
4//! containing all documented items across multiple crates. The index can be used
5//! with client-side search libraries like Fuse.js, Lunr.js, or `FlexSearch`.
6//!
7//! # Output Format
8//!
9//! The generated `search_index.json` contains:
10//!
11//! ```json
12//! {
13//! "items": [
14//! {
15//! "name": "Span",
16//! "path": "tracing::span::Span",
17//! "kind": "struct",
18//! "crate": "tracing",
19//! "file": "tracing/span/index.md",
20//! "summary": "A handle representing a span..."
21//! }
22//! ]
23//! }
24//! ```
25//!
26//! # Usage
27//!
28//! ```ignore
29//! let generator = SearchIndexGenerator::new(&crates);
30//! generator.write(Path::new("generated_docs/"))?;
31//! ```
32
33use std::collections::{HashMap, HashSet};
34use std::path::Path;
35
36use rustdoc_types::{Crate, Id, ItemEnum, Visibility};
37use serde::Serialize;
38
39use super::{CrateCollection, RUST_PATH_SEP};
40
41/// A single searchable item in the index.
42///
43/// Contains all metadata needed for search and display in results.
44#[derive(Debug, Serialize)]
45pub struct SearchEntry {
46 /// Item name (e.g., "Span", "spawn", "Error").
47 pub name: String,
48
49 /// Full path including crate (e.g., "`tracing::span::Span`").
50 pub path: String,
51
52 /// Item kind for filtering and display.
53 ///
54 /// One of: "mod", "struct", "enum", "trait", "fn", "type", "const", "macro"
55 pub kind: &'static str,
56
57 /// Crate this item belongs to.
58 #[serde(rename = "crate")]
59 pub crate_name: String,
60
61 /// Relative file path to the markdown documentation.
62 pub file: String,
63
64 /// First line of documentation for preview in search results.
65 ///
66 /// `None` if the item has no documentation.
67 #[serde(skip_serializing_if = "Option::is_none")]
68 pub summary: Option<String>,
69}
70
71/// The complete search index containing all searchable items.
72///
73/// Serialized to `search_index.json` for client-side consumption.
74#[derive(Debug, Serialize)]
75pub struct SearchIndex {
76 /// All searchable items across all crates.
77 pub items: Vec<SearchEntry>,
78}
79
80/// Generator for multi-crate search indices.
81///
82/// Traverses all crates in a [`CrateCollection`] and builds a comprehensive
83/// search index of all public items (or all items if `include_private` is set).
84///
85/// # Example
86///
87/// ```ignore
88/// let crates = MultiCrateParser::parse_directory(Path::new("target/doc"))?;
89/// let rendered_items = generator.generate(); // Returns HashMap<String, HashSet<Id>>
90/// let generator = SearchIndexGenerator::new(&crates, false, rendered_items);
91/// generator.write(Path::new("generated_docs/"))?;
92/// ```
93pub struct SearchIndexGenerator<'a> {
94 /// Collection of crates to index.
95 crates: &'a CrateCollection,
96
97 /// Whether to include private items in the search index.
98 ///
99 /// When false (default), only public items are indexed.
100 /// When true, all items regardless of visibility are indexed.
101 include_private: bool,
102
103 /// Set of item IDs that were actually rendered per crate.
104 ///
105 /// Only items in this set will appear in the search index.
106 /// This ensures the search index matches the generated documentation.
107 rendered_items: HashMap<String, HashSet<Id>>,
108}
109
110impl<'a> SearchIndexGenerator<'a> {
111 /// Create a new search index generator.
112 ///
113 /// # Arguments
114 ///
115 /// * `crates` - Collection of parsed crates to index
116 /// * `include_private` - Whether to include non-public items
117 /// * `rendered_items` - Map of crate name to set of rendered item IDs
118 #[must_use]
119 pub const fn new(
120 crates: &'a CrateCollection,
121 include_private: bool,
122 rendered_items: HashMap<String, HashSet<Id>>,
123 ) -> Self {
124 Self {
125 crates,
126 include_private,
127 rendered_items,
128 }
129 }
130
131 /// Generate the complete search index.
132 ///
133 /// Traverses all crates and collects searchable items including:
134 /// - Modules
135 /// - Structs
136 /// - Enums
137 /// - Traits
138 /// - Functions
139 /// - Type aliases
140 /// - Constants
141 /// - Macros
142 ///
143 /// Items are sorted alphabetically by name for consistent output.
144 #[must_use]
145 pub fn generate(&self) -> SearchIndex {
146 let mut items = Vec::new();
147
148 for (crate_name, krate) in self.crates.iter() {
149 self.index_crate(&mut items, crate_name, krate);
150 }
151
152 // Sort by name for consistent, deterministic output
153 items.sort_by(|a, b| a.name.cmp(&b.name));
154
155 SearchIndex { items }
156 }
157
158 /// Write the search index to `search_index.json` in the output directory.
159 ///
160 /// # Arguments
161 ///
162 /// * `output_dir` - Directory where `search_index.json` will be written
163 ///
164 /// # Errors
165 ///
166 /// Returns an error if the file cannot be written.
167 pub fn write(&self, output_dir: &Path) -> std::io::Result<()> {
168 let index = self.generate();
169 let json = serde_json::to_string_pretty(&index)?;
170 let path = output_dir.join("search_index.json");
171 fs_err::write(path, json)?;
172 Ok(())
173 }
174
175 /// Index all items in a single crate.
176 ///
177 /// Only indexes items that were actually rendered (present in `rendered_items`).
178 fn index_crate(&self, items: &mut Vec<SearchEntry>, crate_name: &str, krate: &Crate) {
179 // Get the set of rendered items for this crate
180 let rendered_set = self.rendered_items.get(crate_name);
181
182 // Build a map of item ID to module path for accurate path construction
183 let path_map = Self::build_path_map(krate);
184
185 for (id, item) in &krate.index {
186 let Some(name) = &item.name else { continue };
187
188 // Filter by rendered items - only include items that were actually rendered
189 if let Some(rendered) = rendered_set
190 && !rendered.contains(id)
191 {
192 continue;
193 }
194
195 // Filter by visibility unless include_private is set
196 if !self.include_private && !matches!(item.visibility, Visibility::Public) {
197 continue;
198 }
199
200 // Determine item kind and whether to include
201 let kind = match &item.inner {
202 ItemEnum::Module(_) => "mod",
203 ItemEnum::Struct(_) => "struct",
204 ItemEnum::Enum(_) => "enum",
205 ItemEnum::Trait(_) => "trait",
206 ItemEnum::Function(_) => "fn",
207 ItemEnum::TypeAlias(_) => "type",
208 ItemEnum::Constant { .. } => "const",
209 ItemEnum::Macro(_) => "macro",
210 // Skip other item types (impl blocks, fields, variants, etc.)
211 _ => continue,
212 };
213
214 // Build full path (crate::module::Item)
215 let module_path = path_map.get(id).cloned().unwrap_or_default();
216 let full_path = if module_path.is_empty() {
217 format!("{crate_name}::{name}")
218 } else {
219 format!("{crate_name}::{module_path}::{name}")
220 };
221
222 // Build file path based on module location
223 let file = Self::compute_file_path(crate_name, &module_path, kind);
224
225 // Extract first line of documentation as summary
226 let summary = item
227 .docs
228 .as_ref()
229 .and_then(|d| d.lines().next())
230 .map(str::to_string);
231
232 items.push(SearchEntry {
233 name: name.clone(),
234 path: full_path,
235 kind,
236 crate_name: crate_name.to_string(),
237 file,
238 summary,
239 });
240 }
241 }
242
243 /// Build a map from item ID to its module path.
244 ///
245 /// This allows us to reconstruct the full path for each item.
246 fn build_path_map(krate: &Crate) -> HashMap<Id, String> {
247 let mut path_map = HashMap::new();
248
249 // Use the crate's paths table which maps IDs to their paths
250 for (id, item_summary) in &krate.paths {
251 // Skip external items (from other crates)
252 if item_summary.crate_id != 0 {
253 continue;
254 }
255
256 // Build path from components, excluding the item name itself
257 let path_components = &item_summary.path;
258 if path_components.len() > 1 {
259 // Path without the crate name and without the item name
260 let module_path = path_components[1..path_components.len() - 1].join("::");
261 path_map.insert(*id, module_path);
262 } else {
263 path_map.insert(*id, String::new());
264 }
265 }
266
267 path_map
268 }
269
270 /// Compute the file path for an item based on its module location.
271 fn compute_file_path(crate_name: &str, module_path: &str, kind: &str) -> String {
272 if module_path.is_empty() {
273 // Root-level item
274 format!("{crate_name}/index.md")
275 } else if kind == "mod" {
276 // Module gets its own directory
277 // Convert Rust path separators (::) to file path separators (/)
278 let path = module_path.replace(RUST_PATH_SEP, "/");
279 format!("{crate_name}/{path}/index.md")
280 } else {
281 // Item within a module - link to the module's index
282 let path = module_path.replace(RUST_PATH_SEP, "/");
283 format!("{crate_name}/{path}/index.md")
284 }
285 }
286}
287
288#[cfg(test)]
289mod tests {
290 use super::*;
291
292 #[test]
293 fn test_search_entry_serialization() {
294 let entry = SearchEntry {
295 name: "Span".to_string(),
296 path: "tracing::span::Span".to_string(),
297 kind: "struct",
298 crate_name: "tracing".to_string(),
299 file: "tracing/span/index.md".to_string(),
300 summary: Some("A handle representing a span.".to_string()),
301 };
302
303 let json = serde_json::to_string(&entry).unwrap();
304 assert!(json.contains("\"name\":\"Span\""));
305 assert!(json.contains("\"kind\":\"struct\""));
306 assert!(json.contains("\"crate\":\"tracing\""));
307 }
308
309 #[test]
310 fn test_search_entry_without_summary() {
311 let entry = SearchEntry {
312 name: "foo".to_string(),
313 path: "crate::foo".to_string(),
314 kind: "fn",
315 crate_name: "crate".to_string(),
316 file: "crate/index.md".to_string(),
317 summary: None,
318 };
319
320 let json = serde_json::to_string(&entry).unwrap();
321 // summary should be skipped when None
322 assert!(!json.contains("summary"));
323 }
324}