cargo_docs_md/source/
parser.rs

1//! Source code parser using `syn`.
2//!
3//! This module provides parsing of Rust source files to extract
4//! information that is not available in rustdoc JSON, such as
5//! function bodies, private items, and implementation details.
6
7#![expect(
8    clippy::unused_self,
9    reason = "Will probably be needed for future work."
10)]
11
12use std::fs as StdFs;
13use std::path::{Path, PathBuf};
14use std::string::ToString;
15
16use quote::ToTokens;
17use syn::spanned::Spanned;
18use syn::{
19    Attribute, Expr, Fields, File, ImplItem, Item, ItemConst, ItemEnum, ItemFn, ItemImpl, ItemMod,
20    ItemStatic, ItemStruct, ItemTrait, ItemType, Lit, Meta, StaticMutability, Visibility,
21};
22
23use super::types::{
24    ConstInfo, CrateSource, EnumInfo, FieldInfo, FunctionInfo, ImplInfo, MacroInfo, StaticInfo,
25    StructInfo, TraitInfo, TypeAliasInfo, VariantInfo,
26};
27use crate::error::Error;
28
29/// Parser for Rust source code using `syn`.
30#[derive(Debug, Default)]
31#[expect(clippy::struct_field_names, reason = "Not really an issue.")]
32pub struct SourceParser {
33    /// The crate name being parsed.
34    crate_name: String,
35
36    /// The crate version.
37    crate_version: String,
38
39    /// Root path of the crate.
40    crate_root: PathBuf,
41}
42
43impl SourceParser {
44    /// Create a new source parser for a crate.
45    #[must_use]
46    pub const fn new(name: String, version: String, root_path: PathBuf) -> Self {
47        Self {
48            crate_name: name,
49            crate_version: version,
50            crate_root: root_path,
51        }
52    }
53
54    /// Parse an entire crate starting from its root.
55    ///
56    /// # Errors
57    ///
58    /// Returns an error if any source file cannot be parsed.
59    pub fn parse_crate(&self) -> Result<CrateSource, Error> {
60        let mut source = CrateSource::new(
61            self.crate_name.clone(),
62            self.crate_version.clone(),
63            self.crate_root.clone(),
64        );
65
66        // Find the entry point (lib.rs or main.rs)
67        let entry_point = self.find_entry_point()?;
68
69        // Parse starting from the entry point
70        self.parse_module_file(&entry_point, &self.crate_name, &mut source)?;
71
72        Ok(source)
73    }
74
75    /// Find the crate entry point (lib.rs or main.rs).
76    fn find_entry_point(&self) -> Result<PathBuf, Error> {
77        let src_dir = self.crate_root.join("src");
78
79        // Try lib.rs first (library crate)
80        let lib_rs = src_dir.join("lib.rs");
81
82        if lib_rs.exists() {
83            return Ok(lib_rs);
84        }
85
86        // Try main.rs (binary crate)
87        let main_rs = src_dir.join("main.rs");
88
89        if main_rs.exists() {
90            return Ok(main_rs);
91        }
92
93        // Try lib.rs in crate root (some crates don't use src/)
94        let root_lib = self.crate_root.join("lib.rs");
95
96        if root_lib.exists() {
97            return Ok(root_lib);
98        }
99
100        Err(Error::SourceParser(format!(
101            "No entry point found for crate at {}",
102            self.crate_root.display()
103        )))
104    }
105
106    /// Parse a single module file and its submodules.
107    fn parse_module_file(
108        &self,
109        path: &Path,
110        module_path: &str,
111        source: &mut CrateSource,
112    ) -> Result<(), Error> {
113        let content = StdFs::read_to_string(path)
114            .map_err(|e| Error::SourceParser(format!("Failed to read {}: {e}", path.display())))?;
115
116        let file = syn::parse_file(&content)
117            .map_err(|e| Error::SourceParser(format!("Failed to parse {}: {e}", path.display())))?;
118
119        self.process_file(&file, path, module_path, source)?;
120
121        Ok(())
122    }
123
124    /// Process a parsed file, extracting items and following submodules.
125    fn process_file(
126        &self,
127        file: &File,
128        file_path: &Path,
129        module_path: &str,
130        source: &mut CrateSource,
131    ) -> Result<(), Error> {
132        for item in &file.items {
133            self.process_item(item, file_path, module_path, source)?;
134        }
135
136        Ok(())
137    }
138
139    /// Process a single item from a file.
140    fn process_item(
141        &self,
142        item: &Item,
143        file_path: &Path,
144        module_path: &str,
145        source: &mut CrateSource,
146    ) -> Result<(), Error> {
147        match item {
148            Item::Fn(func) => {
149                source
150                    .functions
151                    .push(self.extract_function(func, file_path, module_path));
152            },
153
154            Item::Struct(s) => {
155                source
156                    .structs
157                    .push(self.extract_struct(s, file_path, module_path));
158            },
159
160            Item::Enum(e) => {
161                source
162                    .enums
163                    .push(self.extract_enum(e, file_path, module_path));
164            },
165
166            Item::Trait(t) => {
167                source
168                    .traits
169                    .push(self.extract_trait(t, file_path, module_path));
170            },
171
172            Item::Impl(impl_block) => {
173                source
174                    .impls
175                    .push(self.extract_impl(impl_block, file_path, module_path));
176            },
177
178            Item::Const(c) => {
179                source
180                    .constants
181                    .push(self.extract_const(c, file_path, module_path));
182            },
183
184            Item::Static(s) => {
185                source
186                    .statics
187                    .push(self.extract_static(s, file_path, module_path));
188            },
189
190            Item::Type(t) => {
191                source
192                    .type_aliases
193                    .push(self.extract_type_alias(t, file_path, module_path));
194            },
195
196            Item::Macro(m) => {
197                if let Some(ident) = &m.ident {
198                    source.macros.push(MacroInfo {
199                        name: ident.to_string(),
200                        module_path: module_path.to_string(),
201                        definition: m.to_token_stream().to_string(),
202                        doc_comments: Self::extract_doc_comments(&m.attrs),
203                        source_file: file_path.to_path_buf(),
204                        line_number: Self::line_of(m),
205                    });
206                }
207            },
208
209            Item::Mod(module) => {
210                self.process_module(module, file_path, module_path, source)?;
211            },
212
213            // Skip other items for now
214            _ => {},
215        }
216
217        Ok(())
218    }
219
220    /// Process a module declaration, potentially following to an external file.
221    fn process_module(
222        &self,
223        module: &ItemMod,
224        current_file: &Path,
225        parent_module_path: &str,
226        source: &mut CrateSource,
227    ) -> Result<(), Error> {
228        let module_name = module.ident.to_string();
229        let new_module_path = format!("{parent_module_path}::{module_name}");
230
231        if let Some((_, items)) = &module.content {
232            // Inline module - process items directly
233            for item in items {
234                self.process_item(item, current_file, &new_module_path, source)?;
235            }
236        } else {
237            // External module - find and parse the file
238            if let Some(module_file) = self.find_module_file(current_file, &module_name) {
239                self.parse_module_file(&module_file, &new_module_path, source)?;
240            }
241            // If module file not found, skip silently (might be cfg'd out)
242        }
243
244        Ok(())
245    }
246
247    /// Find the file for an external module declaration.
248    fn find_module_file(&self, current_file: &Path, module_name: &str) -> Option<PathBuf> {
249        let current_dir = current_file.parent()?;
250
251        // Check if current file is mod.rs or lib.rs/main.rs
252        let file_stem = current_file.file_stem()?.to_str()?;
253        let is_mod_file = file_stem == "mod" || file_stem == "lib" || file_stem == "main";
254
255        if is_mod_file {
256            // Look for module_name.rs in the same directory
257            let sibling = current_dir.join(format!("{module_name}.rs"));
258
259            if sibling.exists() {
260                return Some(sibling);
261            }
262
263            // Look for module_name/mod.rs
264            let subdir = current_dir.join(module_name).join("mod.rs");
265
266            if subdir.exists() {
267                return Some(subdir);
268            }
269        } else {
270            // Current file is something like foo.rs
271            // Look for foo/module_name.rs
272            let parent_dir = current_dir.join(file_stem);
273
274            let sibling = parent_dir.join(format!("{module_name}.rs"));
275
276            if sibling.exists() {
277                return Some(sibling);
278            }
279
280            // Look for foo/module_name/mod.rs
281            let subdir = parent_dir.join(module_name).join("mod.rs");
282
283            if subdir.exists() {
284                return Some(subdir);
285            }
286        }
287
288        None
289    }
290
291    /// Extract function information.
292    fn extract_function(&self, func: &ItemFn, file_path: &Path, module_path: &str) -> FunctionInfo {
293        FunctionInfo {
294            name: func.sig.ident.to_string(),
295            module_path: module_path.to_string(),
296            signature: func.sig.to_token_stream().to_string(),
297            body: func.block.to_token_stream().to_string(),
298            is_public: matches!(func.vis, Visibility::Public(_)),
299            doc_comments: Self::extract_doc_comments(&func.attrs),
300            source_file: file_path.to_path_buf(),
301            line_number: Self::line_of(func),
302        }
303    }
304
305    /// Extract struct information.
306    fn extract_struct(&self, s: &ItemStruct, file_path: &Path, module_path: &str) -> StructInfo {
307        StructInfo {
308            name: s.ident.to_string(),
309            module_path: module_path.to_string(),
310            definition: s.to_token_stream().to_string(),
311            is_public: matches!(s.vis, Visibility::Public(_)),
312            doc_comments: Self::extract_doc_comments(&s.attrs),
313            source_file: file_path.to_path_buf(),
314            line_number: Self::line_of(s),
315            fields: Self::extract_fields(&s.fields),
316        }
317    }
318
319    /// Extract enum information.
320    fn extract_enum(&self, e: &ItemEnum, file_path: &Path, module_path: &str) -> EnumInfo {
321        EnumInfo {
322            name: e.ident.to_string(),
323            module_path: module_path.to_string(),
324            definition: e.to_token_stream().to_string(),
325            is_public: matches!(e.vis, Visibility::Public(_)),
326            doc_comments: Self::extract_doc_comments(&e.attrs),
327            source_file: file_path.to_path_buf(),
328            line_number: Self::line_of(e),
329            variants: e
330                .variants
331                .iter()
332                .map(|v| VariantInfo {
333                    name: v.ident.to_string(),
334                    doc_comments: Self::extract_doc_comments(&v.attrs),
335                    fields: Self::extract_fields(&v.fields),
336                })
337                .collect(),
338        }
339    }
340
341    /// Extract trait information.
342    fn extract_trait(&self, t: &ItemTrait, file_path: &Path, module_path: &str) -> TraitInfo {
343        TraitInfo {
344            name: t.ident.to_string(),
345            module_path: module_path.to_string(),
346            definition: t.to_token_stream().to_string(),
347            is_public: matches!(t.vis, Visibility::Public(_)),
348            doc_comments: Self::extract_doc_comments(&t.attrs),
349            source_file: file_path.to_path_buf(),
350            line_number: Self::line_of(t),
351        }
352    }
353
354    /// Extract impl block information.
355    fn extract_impl(&self, impl_block: &ItemImpl, file_path: &Path, module_path: &str) -> ImplInfo {
356        let self_ty = impl_block.self_ty.to_token_stream().to_string();
357        let trait_name = impl_block
358            .trait_
359            .as_ref()
360            .map(|(_, path, _)| path.to_token_stream().to_string());
361
362        let methods = impl_block
363            .items
364            .iter()
365            .filter_map(|item| {
366                if let ImplItem::Fn(method) = item {
367                    Some(FunctionInfo {
368                        name: method.sig.ident.to_string(),
369                        module_path: module_path.to_string(),
370                        signature: method.sig.to_token_stream().to_string(),
371                        body: method.block.to_token_stream().to_string(),
372                        is_public: matches!(method.vis, Visibility::Public(_)),
373                        doc_comments: Self::extract_doc_comments(&method.attrs),
374                        source_file: file_path.to_path_buf(),
375                        line_number: Self::line_of(method),
376                    })
377                } else {
378                    None
379                }
380            })
381            .collect();
382
383        ImplInfo {
384            self_ty,
385            trait_name,
386            module_path: module_path.to_string(),
387            methods,
388            source_file: file_path.to_path_buf(),
389            line_number: Self::line_of(impl_block),
390        }
391    }
392
393    /// Extract constant information.
394    fn extract_const(&self, c: &ItemConst, file_path: &Path, module_path: &str) -> ConstInfo {
395        ConstInfo {
396            name: c.ident.to_string(),
397            module_path: module_path.to_string(),
398            ty: c.ty.to_token_stream().to_string(),
399            value: c.expr.to_token_stream().to_string(),
400            is_public: matches!(c.vis, Visibility::Public(_)),
401            doc_comments: Self::extract_doc_comments(&c.attrs),
402            source_file: file_path.to_path_buf(),
403            line_number: Self::line_of(c),
404        }
405    }
406
407    /// Extract static information.
408    fn extract_static(&self, s: &ItemStatic, file_path: &Path, module_path: &str) -> StaticInfo {
409        StaticInfo {
410            name: s.ident.to_string(),
411            module_path: module_path.to_string(),
412            ty: s.ty.to_token_stream().to_string(),
413            value: s.expr.to_token_stream().to_string(),
414            is_mutable: matches!(s.mutability, StaticMutability::Mut(_)),
415            is_public: matches!(s.vis, Visibility::Public(_)),
416            doc_comments: Self::extract_doc_comments(&s.attrs),
417            source_file: file_path.to_path_buf(),
418            line_number: Self::line_of(s),
419        }
420    }
421
422    /// Extract type alias information.
423    fn extract_type_alias(
424        &self,
425        t: &ItemType,
426        file_path: &Path,
427        module_path: &str,
428    ) -> TypeAliasInfo {
429        TypeAliasInfo {
430            name: t.ident.to_string(),
431            module_path: module_path.to_string(),
432            aliased_type: t.ty.to_token_stream().to_string(),
433            is_public: matches!(t.vis, Visibility::Public(_)),
434            doc_comments: Self::extract_doc_comments(&t.attrs),
435            source_file: file_path.to_path_buf(),
436            line_number: Self::line_of(t),
437        }
438    }
439
440    /// Extract the starting line number from a spanned item.
441    ///
442    /// Uses `proc-macro2`'s span-locations feature to get accurate line numbers.
443    fn line_of<T: Spanned>(item: &T) -> usize {
444        item.span().start().line
445    }
446
447    /// Extract doc comments from attributes.
448    ///
449    /// Doc comments in Rust are represented as `#[doc = "..."]` attributes.
450    fn extract_doc_comments(attrs: &[Attribute]) -> Vec<String> {
451        attrs
452            .iter()
453            .filter_map(|attr| {
454                if !attr.path().is_ident("doc") {
455                    return None;
456                }
457
458                // Try to extract the doc string from #[doc = "..."]
459                if let Meta::NameValue(nv) = &attr.meta
460                    && let Expr::Lit(expr_lit) = &nv.value
461                    && let Lit::Str(lit_str) = &expr_lit.lit
462                {
463                    return Some(lit_str.value());
464                }
465
466                None
467            })
468            .collect()
469    }
470
471    /// Extract field information from struct/enum fields.
472    fn extract_fields(fields: &Fields) -> Vec<FieldInfo> {
473        match fields {
474            Fields::Named(named) => named
475                .named
476                .iter()
477                .map(|f| FieldInfo {
478                    name: f.ident.as_ref().map(ToString::to_string),
479                    ty: f.ty.to_token_stream().to_string(),
480                    is_public: matches!(f.vis, Visibility::Public(_)),
481                    doc_comments: Self::extract_doc_comments(&f.attrs),
482                })
483                .collect(),
484
485            Fields::Unnamed(unnamed) => unnamed
486                .unnamed
487                .iter()
488                .enumerate()
489                .map(|(i, f)| FieldInfo {
490                    name: Some(format!("{i}")),
491                    ty: f.ty.to_token_stream().to_string(),
492                    is_public: matches!(f.vis, Visibility::Public(_)),
493                    doc_comments: Self::extract_doc_comments(&f.attrs),
494                })
495                .collect(),
496
497            Fields::Unit => Vec::new(),
498        }
499    }
500
501    /// Parse a single file without traversing modules.
502    ///
503    /// Useful for quick parsing of individual files.
504    ///
505    /// # Errors
506    ///
507    /// Returns an error if the file cannot be read or parsed.
508    pub fn parse_file(path: &Path) -> Result<File, Error> {
509        let content = StdFs::read_to_string(path)
510            .map_err(|e| Error::SourceParser(format!("Failed to read {}: {e}", path.display())))?;
511
512        syn::parse_file(&content)
513            .map_err(|e| Error::SourceParser(format!("Failed to parse {}: {e}", path.display())))
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn test_extract_doc_comments() {
523        let src = r"
524            /// This is a doc comment
525            /// with multiple lines
526            pub fn foo() {}
527        ";
528
529        let file = syn::parse_file(src).unwrap();
530        if let Item::Fn(func) = &file.items[0] {
531            let docs = SourceParser::extract_doc_comments(&func.attrs);
532
533            assert_eq!(docs.len(), 2);
534            assert_eq!(docs[0], " This is a doc comment");
535            assert_eq!(docs[1], " with multiple lines");
536        } else {
537            panic!("Expected function");
538        }
539    }
540
541    #[test]
542    fn test_extract_function() {
543        let src = r"
544            pub fn add(a: i32, b: i32) -> i32 {
545                a + b
546            }
547        ";
548
549        let file = syn::parse_file(src).unwrap();
550        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
551
552        if let Item::Fn(func) = &file.items[0] {
553            let info = parser.extract_function(func, Path::new("test.rs"), "crate");
554            assert_eq!(info.name, "add");
555            assert!(info.is_public);
556            assert!(info.signature.contains("fn add"));
557            assert!(info.body.contains("a + b"));
558        }
559    }
560
561    #[test]
562    fn test_extract_struct_fields() {
563        let src = r"
564            pub struct Point {
565                /// X coordinate
566                pub x: f64,
567                /// Y coordinate
568                pub y: f64,
569            }
570        ";
571
572        let file = syn::parse_file(src).unwrap();
573        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
574
575        if let Item::Struct(s) = &file.items[0] {
576            let info = parser.extract_struct(s, Path::new("test.rs"), "crate");
577            assert_eq!(info.name, "Point");
578            assert_eq!(info.fields.len(), 2);
579            assert_eq!(info.fields[0].name, Some("x".to_string()));
580            assert!(info.fields[0].doc_comments[0].contains("X coordinate"));
581        }
582    }
583
584    #[test]
585    fn test_line_numbers_function() {
586        // Line 1 is the function
587        let src = "pub fn foo() {}";
588
589        let file = syn::parse_file(src).unwrap();
590        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
591
592        if let Item::Fn(func) = &file.items[0] {
593            let info = parser.extract_function(func, Path::new("test.rs"), "crate");
594            assert_eq!(info.line_number, 1, "Function should be on line 1");
595        } else {
596            panic!("Expected function");
597        }
598    }
599
600    #[test]
601    fn test_line_numbers_multiple_items() {
602        let src = r"pub fn first() {}
603
604pub struct Second;
605
606pub enum Third { A, B }
607
608pub const FOURTH: i32 = 42;
609";
610
611        let file = syn::parse_file(src).unwrap();
612        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
613
614        // Function on line 1
615        if let Item::Fn(func) = &file.items[0] {
616            let info = parser.extract_function(func, Path::new("test.rs"), "crate");
617            assert_eq!(info.line_number, 1, "first() should be on line 1");
618        }
619
620        // Struct on line 3
621        if let Item::Struct(s) = &file.items[1] {
622            let info = parser.extract_struct(s, Path::new("test.rs"), "crate");
623            assert_eq!(info.line_number, 3, "Second should be on line 3");
624        }
625
626        // Enum on line 5
627        if let Item::Enum(e) = &file.items[2] {
628            let info = parser.extract_enum(e, Path::new("test.rs"), "crate");
629            assert_eq!(info.line_number, 5, "Third should be on line 5");
630        }
631
632        // Const on line 7
633        if let Item::Const(c) = &file.items[3] {
634            let info = parser.extract_const(c, Path::new("test.rs"), "crate");
635            assert_eq!(info.line_number, 7, "FOURTH should be on line 7");
636        }
637    }
638
639    #[test]
640    fn test_line_numbers_impl_block() {
641        let src = r"struct Foo;
642
643impl Foo {
644    pub fn method_one(&self) {}
645
646    pub fn method_two(&self) {}
647}
648";
649
650        let file = syn::parse_file(src).unwrap();
651        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
652
653        if let Item::Impl(impl_block) = &file.items[1] {
654            let info = parser.extract_impl(impl_block, Path::new("test.rs"), "crate");
655
656            // impl block starts on line 3
657            assert_eq!(info.line_number, 3, "impl block should be on line 3");
658
659            // Methods have their own line numbers
660            assert_eq!(info.methods.len(), 2);
661            assert_eq!(
662                info.methods[0].line_number, 4,
663                "method_one should be on line 4"
664            );
665            assert_eq!(
666                info.methods[1].line_number, 6,
667                "method_two should be on line 6"
668            );
669        } else {
670            panic!("Expected impl block");
671        }
672    }
673
674    #[test]
675    fn test_line_numbers_trait() {
676        let src = r"
677/// A trait
678pub trait MyTrait {
679    fn required(&self);
680}
681";
682
683        let file = syn::parse_file(src).unwrap();
684        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
685
686        if let Item::Trait(t) = &file.items[0] {
687            let info = parser.extract_trait(t, Path::new("test.rs"), "crate");
688            // Doc comment is on line 2, trait keyword on line 3
689            assert_eq!(
690                info.line_number, 2,
691                "Trait should start on line 2 (doc comment)"
692            );
693        } else {
694            panic!("Expected trait");
695        }
696    }
697
698    #[test]
699    fn test_line_numbers_static_and_type_alias() {
700        let src = r"pub static FOO: i32 = 1;
701
702pub type Bar = Vec<String>;
703";
704
705        let file = syn::parse_file(src).unwrap();
706        let parser = SourceParser::new("test".into(), "0.1.0".into(), PathBuf::new());
707
708        if let Item::Static(s) = &file.items[0] {
709            let info = parser.extract_static(s, Path::new("test.rs"), "crate");
710            assert_eq!(info.line_number, 1, "static FOO should be on line 1");
711        }
712
713        if let Item::Type(t) = &file.items[1] {
714            let info = parser.extract_type_alias(t, Path::new("test.rs"), "crate");
715            assert_eq!(info.line_number, 3, "type Bar should be on line 3");
716        }
717    }
718}