cargo_docs_md/parser.rs
1//! Rustdoc JSON parsing module.
2//!
3//! This module handles loading and parsing rustdoc JSON files into the
4//! `rustdoc_types::Crate` structure that represents the entire documented crate.
5//!
6//! # Rustdoc JSON Format
7//!
8//! Rustdoc JSON is generated by running:
9//! ```bash
10//! cargo doc --output-format json
11//! ```
12//!
13//! The output is a single JSON file at `target/doc/{crate_name}.json` containing:
14//! - The crate's module hierarchy
15//! - All public (and optionally private) items
16//! - Documentation strings
17//! - Type information and generics
18//! - Cross-reference links between items
19//!
20//! # Key Types
21//!
22//! The parsed `Crate` contains:
23//! - `root`: ID of the root module
24//! - `index`: `HashMap` of all items by their ID
25//! - `paths`: `HashMap` mapping IDs to their full module paths
26//! - `crate_version`: Optional version string
27//!
28//! # Performance
29//!
30//! When the `simd-json` feature is enabled, parsing uses SIMD-accelerated
31//! JSON parsing which is significantly faster for large rustdoc JSON files
32//! (10-50MB+). This requires AVX2/SSE4.2 on x86 platforms.
33//!
34//! # Memory Usage
35//!
36//! The entire rustdoc JSON file is loaded into memory and deserialized into
37//! a `Crate` structure. For typical crates (1-20MB JSON), this works well.
38//!
39//! For very large crates (e.g., `aws_sdk_ec2` at ~500MB), memory usage will be:
40//! - JSON file size in memory during parsing
41//! - Plus the deserialized `Crate` structure (usually similar size)
42//! - Peak memory ≈ 2x JSON file size
43//!
44//! Future optimization: For extremely large crates, `serde_json::StreamDeserializer`
45//! could be used for incremental parsing, trading some simplicity for lower peak memory.
46
47use fs_err as FileSystemError;
48use rustdoc_types::Crate;
49use tracing::instrument;
50
51use crate::error::Error;
52
53/// Parser for rustdoc JSON files.
54///
55/// Provides methods to load and parse rustdoc JSON from files or strings
56/// into the `rustdoc_types::Crate` structure.
57pub struct Parser;
58
59impl Parser {
60 /// Parse a rustdoc JSON file from disk into a `Crate` structure.
61 ///
62 /// This is the primary entry point for loading documentation data.
63 /// The file should be generated with `cargo doc --output-format json`.
64 /// Parse a JSON string into a Crate structure.
65 ///
66 /// # Errors
67 /// Returns an error if it faills to parse the JSON.
68 #[instrument(skip(json), fields(json_len = json.len()))]
69 pub fn parse_json(json: &str) -> Result<Crate, Error> {
70 tracing::info!("Starting JSON parsing");
71
72 #[cfg(feature = "simd-json")]
73 let result = {
74 tracing::debug!("Using simd-json parser");
75
76 let mut json_bytes = json.as_bytes().to_vec();
77 simd_json::from_slice::<Crate>(&mut json_bytes).map_err(Error::SimdJsonParse)
78 };
79
80 #[cfg(not(feature = "simd-json"))]
81 let result: Result<Crate, Error> = {
82 tracing::debug!("Using serde_json parser");
83
84 serde_json::from_str(json).map_err(Error::JsonParse)
85 };
86
87 match &result {
88 Ok(krate) => {
89 tracing::info!(
90 crate_name = ?krate.index.get(&krate.root).and_then(|i| i.name.as_ref()),
91 item_count = krate.index.len(),
92 "Successfully parsed crate"
93 );
94 },
95
96 Err(e) => {
97 tracing::warn!(error = %e, "Failed to parse JSON");
98 },
99 }
100
101 result
102 }
103
104 /// Parse a JSON file.
105 ///
106 /// # Errors
107 /// Returns error if fails to read the JSON file.
108 #[instrument(skip_all, fields(path = %path.as_ref().display()))]
109 pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Crate, Error> {
110 let path = path.as_ref();
111 tracing::debug!("Reading file");
112
113 let json = FileSystemError::read_to_string(path).map_err(Error::FileRead)?;
114 tracing::debug!(bytes = json.len(), "File read successfully");
115
116 Self::parse_json(&json)
117 }
118
119 /// Parse a rustdoc JSON string into a `Crate` structure.
120 ///
121 /// This function is useful when the JSON content is already in memory
122 /// (e.g., fetched from a URL or embedded in tests).
123 ///
124 /// # Arguments
125 ///
126 /// * `content` - The raw JSON string to parse
127 ///
128 /// # Returns
129 ///
130 /// A parsed `Crate` structure containing all documentation data.
131 ///
132 /// # Errors
133 ///
134 /// Returns `Error::JsonParse` if the JSON is invalid or doesn't match
135 /// the expected rustdoc JSON schema.
136 ///
137 /// # Schema Compatibility
138 ///
139 /// The `rustdoc-types` crate version must match the rustdoc JSON format
140 /// version. Mismatches can cause parsing failures or missing fields.
141 pub fn parse_json_string(content: &str) -> Result<Crate, Error> {
142 // Deserialize the JSON into the Crate type from rustdoc-types.
143 // This validates the structure against the expected schema.
144 let krate: Crate = serde_json::from_str(content).map_err(Error::JsonParse)?;
145
146 Ok(krate)
147 }
148}