cargo_docs_md/parser.rs
1//! Rustdoc JSON parsing module.
2//!
3//! This module handles loading and parsing rustdoc JSON files into the
4//! `rustdoc_types::Crate` structure that represents the entire documented crate.
5//!
6//! # Rustdoc JSON Format
7//!
8//! Rustdoc JSON is generated by running:
9//! ```bash
10//! cargo doc --output-format json
11//! ```
12//!
13//! The output is a single JSON file at `target/doc/{crate_name}.json` containing:
14//! - The crate's module hierarchy
15//! - All public (and optionally private) items
16//! - Documentation strings
17//! - Type information and generics
18//! - Cross-reference links between items
19//!
20//! # Key Types
21//!
22//! The parsed `Crate` contains:
23//! - `root`: ID of the root module
24//! - `index`: `HashMap` of all items by their ID
25//! - `paths`: `HashMap` mapping IDs to their full module paths
26//! - `crate_version`: Optional version string
27//!
28//! # Performance
29//!
30//! When the `simd-json` feature is enabled, parsing uses SIMD-accelerated
31//! JSON parsing which is significantly faster for large rustdoc JSON files
32//! (10-50MB+). This requires AVX2/SSE4.2 on x86 platforms.
33
34use fs_err as FileSystemError;
35use rustdoc_types::Crate;
36use tracing::instrument;
37
38use crate::error::Error;
39
40/// Parser for rustdoc JSON files.
41///
42/// Provides methods to load and parse rustdoc JSON from files or strings
43/// into the `rustdoc_types::Crate` structure.
44pub struct Parser;
45
46impl Parser {
47 /// Parse a rustdoc JSON file from disk into a `Crate` structure.
48 ///
49 /// This is the primary entry point for loading documentation data.
50 /// The file should be generated with `cargo doc --output-format json`.
51 /// Parse a JSON string into a Crate structure.
52 ///
53 /// # Errors
54 /// Returns an error if it faills to parse the JSON.
55 #[instrument(skip(json), fields(json_len = json.len()))]
56 pub fn parse_json(json: &str) -> Result<Crate, Error> {
57 tracing::info!("Starting JSON parsing");
58
59 #[cfg(feature = "simd-json")]
60 let result = {
61 tracing::debug!("Using simd-json parser");
62
63 let mut json_bytes = json.as_bytes().to_vec();
64 simd_json::from_slice::<Crate>(&mut json_bytes).map_err(Error::SimdJsonParse)
65 };
66
67 #[cfg(not(feature = "simd-json"))]
68 let result: Result<Crate, Error> = {
69 tracing::debug!("Using serde_json parser");
70
71 serde_json::from_str(json).map_err(Error::JsonParse)
72 };
73
74 match &result {
75 Ok(krate) => {
76 tracing::info!(
77 crate_name = ?krate.index.get(&krate.root).and_then(|i| i.name.as_ref()),
78 item_count = krate.index.len(),
79 "Successfully parsed crate"
80 );
81 },
82
83 Err(e) => {
84 tracing::warn!(error = %e, "Failed to parse JSON");
85 },
86 }
87
88 result
89 }
90
91 /// Parse a JSON file.
92 ///
93 /// # Errors
94 /// Returns error if fails to read the JSON file.
95 #[instrument(skip_all, fields(path = %path.as_ref().display()))]
96 pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Crate, Error> {
97 let path = path.as_ref();
98 tracing::debug!("Reading file");
99
100 let json = FileSystemError::read_to_string(path).map_err(Error::FileRead)?;
101 tracing::debug!(bytes = json.len(), "File read successfully");
102
103 Self::parse_json(&json)
104 }
105
106 /// Parse a rustdoc JSON string into a `Crate` structure.
107 ///
108 /// This function is useful when the JSON content is already in memory
109 /// (e.g., fetched from a URL or embedded in tests).
110 ///
111 /// # Arguments
112 ///
113 /// * `content` - The raw JSON string to parse
114 ///
115 /// # Returns
116 ///
117 /// A parsed `Crate` structure containing all documentation data.
118 ///
119 /// # Errors
120 ///
121 /// Returns `Error::JsonParse` if the JSON is invalid or doesn't match
122 /// the expected rustdoc JSON schema.
123 ///
124 /// # Schema Compatibility
125 ///
126 /// The `rustdoc-types` crate version must match the rustdoc JSON format
127 /// version. Mismatches can cause parsing failures or missing fields.
128 pub fn parse_json_string(content: &str) -> Result<Crate, Error> {
129 // Deserialize the JSON into the Crate type from rustdoc-types.
130 // This validates the structure against the expected schema.
131 let krate: Crate = serde_json::from_str(content).map_err(Error::JsonParse)?;
132
133 Ok(krate)
134 }
135}