cargo_docs_md/
parser.rs

1//! Rustdoc JSON parsing module.
2//!
3//! This module handles loading and parsing rustdoc JSON files into the
4//! `rustdoc_types::Crate` structure that represents the entire documented crate.
5//!
6//! # Rustdoc JSON Format
7//!
8//! Rustdoc JSON is generated by running:
9//! ```bash
10//! cargo doc --output-format json
11//! ```
12//!
13//! The output is a single JSON file at `target/doc/{crate_name}.json` containing:
14//! - The crate's module hierarchy
15//! - All public (and optionally private) items
16//! - Documentation strings
17//! - Type information and generics
18//! - Cross-reference links between items
19//!
20//! # Key Types
21//!
22//! The parsed `Crate` contains:
23//! - `root`: ID of the root module
24//! - `index`: `HashMap` of all items by their ID
25//! - `paths`: `HashMap` mapping IDs to their full module paths
26//! - `crate_version`: Optional version string
27//!
28//! # Performance
29//!
30//! When the `simd-json` feature is enabled, parsing uses SIMD-accelerated
31//! JSON parsing which is significantly faster for large rustdoc JSON files
32//! (10-50MB+). This requires AVX2/SSE4.2 on x86 platforms.
33//!
34//! # Memory Usage
35//!
36//! The entire rustdoc JSON file is loaded into memory and deserialized into
37//! a `Crate` structure. For typical crates (1-20MB JSON), this works well.
38//!
39//! For very large crates (e.g., `aws_sdk_ec2` at ~500MB), memory usage will be:
40//! - JSON file size in memory during parsing
41//! - Plus the deserialized `Crate` structure (usually similar size)
42//! - Peak memory ≈ 2x JSON file size
43//!
44//! Future optimization: For extremely large crates, `serde_json::StreamDeserializer`
45//! could be used for incremental parsing, trading some simplicity for lower peak memory.
46
47use fs_err as FileSystemError;
48use rustdoc_types::Crate;
49use tracing::instrument;
50
51use crate::error::Error;
52
53/// Parser for rustdoc JSON files.
54///
55/// Provides methods to load and parse rustdoc JSON from files or strings
56/// into the `rustdoc_types::Crate` structure.
57pub struct Parser;
58
59impl Parser {
60    /// Parse a rustdoc JSON file from disk into a `Crate` structure.
61    ///
62    /// This is the primary entry point for loading documentation data.
63    /// The file should be generated with `cargo doc --output-format json`.
64    /// Parse a JSON string into a Crate structure.
65    ///
66    /// # Errors
67    /// Returns an error if it faills to parse the JSON.
68    #[instrument(skip(json), fields(json_len = json.len()))]
69    pub fn parse_json(json: &str) -> Result<Crate, Error> {
70        tracing::info!("Starting JSON parsing");
71
72        #[cfg(feature = "simd-json")]
73        let result = {
74            tracing::debug!("Using simd-json parser");
75
76            let mut json_bytes = json.as_bytes().to_vec();
77            simd_json::from_slice::<Crate>(&mut json_bytes).map_err(Error::SimdJsonParse)
78        };
79
80        #[cfg(not(feature = "simd-json"))]
81        let result: Result<Crate, Error> = {
82            tracing::debug!("Using serde_json parser");
83
84            serde_json::from_str(json).map_err(Error::JsonParse)
85        };
86
87        match &result {
88            Ok(krate) => {
89                tracing::info!(
90                    crate_name = ?krate.index.get(&krate.root).and_then(|i| i.name.as_ref()),
91                    item_count = krate.index.len(),
92                    "Successfully parsed crate"
93                );
94            },
95
96            Err(e) => {
97                tracing::warn!(error = %e, "Failed to parse JSON");
98            },
99        }
100
101        result
102    }
103
104    /// Parse a JSON file.
105    ///
106    /// # Errors
107    /// Returns error if fails to read the JSON file.
108    #[instrument(skip_all, fields(path = %path.as_ref().display()))]
109    pub fn parse_file(path: impl AsRef<std::path::Path>) -> Result<Crate, Error> {
110        let path = path.as_ref();
111        tracing::debug!("Reading file");
112
113        let json = FileSystemError::read_to_string(path).map_err(Error::FileRead)?;
114        tracing::debug!(bytes = json.len(), "File read successfully");
115
116        Self::parse_json(&json)
117    }
118
119    /// Parse a rustdoc JSON string into a `Crate` structure.
120    ///
121    /// This function is useful when the JSON content is already in memory
122    /// (e.g., fetched from a URL or embedded in tests).
123    ///
124    /// # Arguments
125    ///
126    /// * `content` - The raw JSON string to parse
127    ///
128    /// # Returns
129    ///
130    /// A parsed `Crate` structure containing all documentation data.
131    ///
132    /// # Errors
133    ///
134    /// Returns `Error::JsonParse` if the JSON is invalid or doesn't match
135    /// the expected rustdoc JSON schema.
136    ///
137    /// # Schema Compatibility
138    ///
139    /// The `rustdoc-types` crate version must match the rustdoc JSON format
140    /// version. Mismatches can cause parsing failures or missing fields.
141    pub fn parse_json_string(content: &str) -> Result<Crate, Error> {
142        // Deserialize the JSON into the Crate type from rustdoc-types.
143        // This validates the structure against the expected schema.
144        let krate: Crate = serde_json::from_str(content).map_err(Error::JsonParse)?;
145
146        Ok(krate)
147    }
148}