traverse_graph/natspec/
mod.rs

1/*
2    This module is responsible for parsing NatSpec documentation comments
3    commonly found in Solidity source code. NatSpec comments provide a
4    standardized way to document contracts, functions, parameters, return
5    values, and other code elements.
6
7    The primary functionality includes:
8    - Defining data structures (`NatSpecKind`, `NatSpecItem`, `NatSpec`)
9      to represent the parsed NatSpec information.
10    - Implementing `nom` parsers to break down raw comment strings (both
11      single-line `///` and multi-line `/** ... */`) into these
12      structured types.
13    - Handling various NatSpec tags like `@title`, `@author`, `@notice`,
14      `@dev`, `@param`, `@return`, `@inheritdoc`, and custom tags
15      (`@custom:...`).
16    - Providing utility functions on the `NatSpec` struct to query and
17      manipulate the parsed documentation, such as populating return item
18      names and counting specific tag occurrences.
19
20    The main entry point for parsing is the `parse_natspec_comment` function,
21    which takes a raw comment string and attempts to parse it into a
22    `NatSpec` struct.
23*/
24use nom::{
25    branch::alt,
26    bytes::complete::{tag, take_while1},
27    character::complete::{
28        anychar, char, line_ending, multispace0, not_line_ending, space0, space1,
29    },
30    combinator::{cut, map, not, opt, peek, recognize},
31    multi::{many0, separated_list0},
32    sequence::{delimited, pair, preceded},
33    IResult, Parser,
34};
35use serde::{Deserialize, Serialize};
36use std::ops::Range;
37
38pub mod extract;
39
40#[derive(Default, Copy, Clone, PartialEq, Eq, Debug, Hash, Serialize, Deserialize)]
41pub struct TextIndex {
42    pub utf8: usize,
43    pub line: usize,
44    pub column: usize,
45}
46
47pub type TextRange = Range<TextIndex>;
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct Identifier {
51    pub name: Option<String>,
52    pub span: TextRange,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub enum NatSpecKind {
57    Title,
58    Author,
59    Notice,
60    Dev,
61    Param { name: String },
62    Return { name: Option<String> },
63    Inheritdoc { parent: String },
64    Custom { tag: String },
65}
66
67impl NatSpecKind {
68    pub fn is_param(&self) -> bool {
69        matches!(self, NatSpecKind::Param { .. })
70    }
71    pub fn is_return(&self) -> bool {
72        matches!(self, NatSpecKind::Return { .. })
73    }
74    pub fn is_notice(&self) -> bool {
75        matches!(self, NatSpecKind::Notice)
76    }
77    pub fn is_dev(&self) -> bool {
78        matches!(self, NatSpecKind::Dev)
79    }
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct NatSpecItem {
84    pub kind: NatSpecKind,
85    pub comment: String,
86}
87
88impl NatSpecItem {
89    pub fn populate_return(&mut self, returns: &[Identifier]) {
90        if !matches!(self.kind, NatSpecKind::Return { name: _ }) {
91            return;
92        }
93        
94        // If already populated with a name, don't reprocess
95        if let NatSpecKind::Return { name: Some(_) } = &self.kind {
96            return;
97        }
98        
99        let name = self
100            .comment
101            .split_whitespace()
102            .next()
103            .filter(|first_word| {
104                returns.iter().any(|r| match &r.name {
105                    Some(name) => first_word == name,
106                    None => false,
107                })
108            })
109            .map(ToOwned::to_owned);
110
111        if let Some(name_val) = &name {
112            if let Some(stripped_comment) = self.comment.strip_prefix(name_val) {
113                self.comment = stripped_comment.trim_start().to_string();
114            }
115        }
116        self.kind = NatSpecKind::Return { name };
117    }
118
119    #[must_use]
120    pub fn is_empty(&self) -> bool {
121        self.kind == NatSpecKind::Notice && self.comment.is_empty()
122    }
123}
124
125#[derive(Debug, Clone, Default, PartialEq, Eq)]
126pub struct NatSpec {
127    pub items: Vec<NatSpecItem>,
128}
129
130impl NatSpec {
131    pub fn append(&mut self, other: &mut Self) {
132        self.items.append(&mut other.items);
133    }
134
135    #[must_use]
136    pub fn populate_returns(mut self, returns: &[Identifier]) -> Self {
137        for i in &mut self.items {
138            i.populate_return(returns);
139        }
140        self
141    }
142
143    #[must_use]
144    pub fn count_param(&self, ident: &Identifier) -> usize {
145        let Some(ident_name) = &ident.name else {
146            return 0;
147        };
148        self.items
149            .iter()
150            .filter(|n| match &n.kind {
151                NatSpecKind::Param { name } => name == ident_name,
152                _ => false,
153            })
154            .count()
155    }
156
157    #[must_use]
158    pub fn count_return(&self, ident: &Identifier) -> usize {
159        let Some(ident_name) = &ident.name else {
160            return 0;
161        };
162        self.items
163            .iter()
164            .filter(|n| match &n.kind {
165                NatSpecKind::Return { name: Some(name) } => name == ident_name,
166                _ => false,
167            })
168            .count()
169    }
170
171    #[must_use]
172    pub fn count_unnamed_returns(&self) -> usize {
173        self.items
174            .iter()
175            .filter(|n| matches!(&n.kind, NatSpecKind::Return { name: None }))
176            .count()
177    }
178
179    #[must_use]
180    pub fn count_all_returns(&self) -> usize {
181        self.items.iter().filter(|n| n.kind.is_return()).count()
182    }
183
184    #[must_use]
185    pub fn has_param(&self) -> bool {
186        self.items.iter().any(|n| n.kind.is_param())
187    }
188
189    #[must_use]
190    pub fn has_return(&self) -> bool {
191        self.items.iter().any(|n| n.kind.is_return())
192    }
193
194    #[must_use]
195    pub fn has_notice(&self) -> bool {
196        self.items.iter().any(|n| n.kind.is_notice())
197    }
198
199    #[must_use]
200    pub fn has_dev(&self) -> bool {
201        self.items.iter().any(|n| n.kind.is_dev())
202    }
203}
204
205impl From<NatSpecItem> for NatSpec {
206    fn from(value: NatSpecItem) -> Self {
207        Self { items: vec![value] }
208    }
209}
210
211fn trim_str(input: &str) -> String {
212    input.trim().to_string()
213}
214
215fn parse_identifier_str(input: &str) -> IResult<&str, String> {
216    let mut parser = map(take_while1(|c: char| !c.is_whitespace()), |s: &str| {
217        s.to_string()
218    });
219    parser.parse(input)
220}
221
222fn parse_natspec_kind(input: &str) -> IResult<&str, NatSpecKind> {
223    let mut parser = alt((
224        map(tag("@title"), |_| NatSpecKind::Title),
225        map(tag("@author"), |_| NatSpecKind::Author),
226        map(tag("@notice"), |_| NatSpecKind::Notice),
227        map(tag("@dev"), |_| NatSpecKind::Dev),
228        map(
229            preceded(pair(tag("@param"), space1), parse_identifier_str),
230            |name| NatSpecKind::Param { name },
231        ),
232        map(tag("@return"), |_| NatSpecKind::Return { name: None }),
233        map(
234            preceded(pair(tag("@inheritdoc"), space1), parse_identifier_str),
235            |parent| NatSpecKind::Inheritdoc { parent },
236        ),
237        map(
238            preceded(tag("@custom:"), parse_identifier_str),
239            |tag_name| NatSpecKind::Custom { tag: tag_name },
240        ),
241    ));
242    parser.parse(input)
243}
244
245fn parse_comment_text(input: &str) -> IResult<&str, String> {
246    let mut parser = map(not_line_ending, trim_str);
247    parser.parse(input)
248}
249
250fn parse_multiline_comment_text(input: &str) -> IResult<&str, String> {
251    let mut parser = map(
252        recognize(many0(preceded(
253            not(peek(alt((line_ending, tag("*/"))))),
254            anychar,
255        ))),
256        |s: &str| s.trim().to_string(),
257    );
258    parser.parse(input)
259}
260
261fn parse_one_multiline_natspec_item(input: &str) -> IResult<&str, NatSpecItem> {
262    // First check if we're at the closing part of the comment
263    if input.trim_start().starts_with("*/") {
264        return Err(nom::Err::Error(nom::error::Error::new(
265            input,
266            nom::error::ErrorKind::Char,
267        )));
268    }
269
270    let (remaining_input, (_lead_space_consumed, _star_opt, _mid_space_consumed, kind_opt, _trail_space_consumed, comment_str)) = (
271        space0,
272        opt(many0(char('*'))),  // Changed to consume multiple asterisks
273        space0,
274        opt(parse_natspec_kind),
275        space0,
276        parse_multiline_comment_text,
277    ).parse(input)?;
278
279    let item = NatSpecItem {
280        kind: kind_opt.unwrap_or(NatSpecKind::Notice),
281        comment: comment_str,
282    };
283
284    Ok((remaining_input, item))
285}
286
287fn parse_multiline_comment(input: &str) -> IResult<&str, NatSpec> {
288    // First check if input starts with /*** which is invalid
289    if input.starts_with("/***") {
290        return Err(nom::Err::Error(nom::error::Error::new(
291            input,
292            nom::error::ErrorKind::Tag,
293        )));
294    }
295    
296    let mut parser = map(
297        delimited(
298            // Changed multispace0 to space0 after tag("/**").
299            // space0 will consume spaces/tabs on the same line as "/**", but not a newline.
300            // If there's a newline after "/**", the first parse_one_multiline_natspec_item's
301            // leading space0 or the separated_list0's line_ending logic will handle it.
302            (tag("/**"), space0),
303            separated_list0(line_ending, parse_one_multiline_natspec_item),
304            preceded(multispace0, tag("*/")),
305        ),
306        |items| {
307            // Filter out any completely empty NatSpecItems (Notice with empty comment)
308            // that might arise from lines like " * " or the final " */" if not handled by line_ending.
309            let filtered_items = items.into_iter().filter(|item| !item.is_empty()).collect();
310            NatSpec { items: filtered_items }
311        },
312    );
313    parser.parse(input)
314}
315
316fn parse_empty_multiline_comment(input: &str) -> IResult<&str, NatSpec> {
317    // Match /**/ or /** */ but not /***/ or similar
318    let mut parser = map(
319        preceded(
320            tag("/**"),
321            preceded(space0, tag("*/"))
322        ),
323        |_| NatSpec::default(),
324    );
325    parser.parse(input)
326}
327
328fn parse_single_line_natspec_item(input: &str) -> IResult<&str, NatSpecItem> {
329    let mut parser = map(
330        (space0, opt(parse_natspec_kind), space0, parse_comment_text),
331        |(_, kind_opt, _, comment_str)| NatSpecItem {
332            kind: kind_opt.unwrap_or(NatSpecKind::Notice),
333            comment: comment_str,
334        },
335    );
336    parser.parse(input)
337}
338
339fn parse_single_line_comment(input: &str) -> IResult<&str, NatSpec> {
340    let mut parser = map(
341        preceded(
342            (tag("///"), cut(not(char('/')))),
343            parse_single_line_natspec_item,
344        ),
345        |item| {
346            if item.is_empty() {
347                NatSpec::default()
348            } else {
349                NatSpec { items: vec![item] }
350            }
351        },
352    );
353    parser.parse(input)
354}
355
356fn do_parse_natspec_comment(input: &str) -> IResult<&str, NatSpec> {
357    let trimmed_input = input.trim();
358    let mut parser = alt((
359        parse_single_line_comment,
360        parse_multiline_comment,
361        parse_empty_multiline_comment,
362    ));
363    parser.parse(trimmed_input)
364}
365
366/// Parses a raw Natspec comment string into a structured `NatSpec` object.
367///
368/// This function handles both single-line (`///`) and multi-line (`/** ... */`)
369/// Natspec comments. It trims the input string before parsing.
370///
371/// # Arguments
372///
373/// * `input`: A string slice representing the raw Natspec comment.
374///
375/// # Returns
376///
377/// * `anyhow::Result<NatSpec>`: A result containing the parsed `NatSpec` on success,
378///   or an `anyhow::Error` if parsing fails.
379pub fn parse_natspec_comment(input: &str) -> anyhow::Result<NatSpec> {
380    use nom::Finish; // Keep Finish scoped to this function
381    match do_parse_natspec_comment(input).finish() {
382        Ok((_, natspec)) => Ok(natspec),
383        Err(e) => {
384            // Use a simpler error message approach that doesn't rely on convert_error
385            Err(anyhow::anyhow!(
386                "Failed to parse Natspec comment: {}",
387                e
388            ))
389        }
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396    use nom::Finish;
397
398    #[test]
399    fn test_parse_identifier_str_parser() {
400        assert_eq!(
401            parse_identifier_str("foo bar"),
402            Ok((" bar", "foo".to_string()))
403        );
404        assert_eq!(parse_identifier_str("foo"), Ok(("", "foo".to_string())));
405    }
406
407    #[test]
408    fn test_natspec_kind_parser() {
409        assert_eq!(parse_natspec_kind("@title"), Ok(("", NatSpecKind::Title)));
410        assert_eq!(parse_natspec_kind("@author"), Ok(("", NatSpecKind::Author)));
411        assert_eq!(parse_natspec_kind("@notice"), Ok(("", NatSpecKind::Notice)));
412        assert_eq!(parse_natspec_kind("@dev"), Ok(("", NatSpecKind::Dev)));
413        assert_eq!(
414            parse_natspec_kind("@param foo"),
415            Ok((
416                "",
417                NatSpecKind::Param {
418                    name: "foo".to_string()
419                }
420            ))
421        );
422        assert_eq!(
423            parse_natspec_kind("@return"),
424            Ok(("", NatSpecKind::Return { name: None }))
425        );
426        assert_eq!(
427            parse_natspec_kind("@inheritdoc ISome"),
428            Ok((
429                "",
430                NatSpecKind::Inheritdoc {
431                    parent: "ISome".to_string()
432                }
433            ))
434        );
435        assert_eq!(
436            parse_natspec_kind("@custom:tagname"),
437            Ok((
438                "",
439                NatSpecKind::Custom {
440                    tag: "tagname".to_string()
441                }
442            ))
443        );
444    }
445
446    #[test]
447    fn test_one_multiline_item_parser() {
448        let cases = [
449            ("* @dev Hello world", NatSpecKind::Dev, "Hello world"),
450            (" @title The Title", NatSpecKind::Title, "The Title"),
451            (
452                "* @author McGyver <hi@buildanything.com>",
453                NatSpecKind::Author,
454                "McGyver <hi@buildanything.com>",
455            ),
456            (
457                " @param foo The bar",
458                NatSpecKind::Param {
459                    name: "foo".to_string(),
460                },
461                "The bar",
462            ),
463            (
464                " @return something The return value",
465                NatSpecKind::Return { name: None },
466                "something The return value",
467            ),
468            (
469                "* @custom:foo bar",
470                NatSpecKind::Custom {
471                    tag: "foo".to_string(),
472                },
473                "bar",
474            ),
475            ("  lorem ipsum", NatSpecKind::Notice, "lorem ipsum"),
476            ("lorem ipsum", NatSpecKind::Notice, "lorem ipsum"),
477            ("*  foobar", NatSpecKind::Notice, "foobar"),
478        ];
479        for (input, kind, comment) in cases {
480            let res = parse_one_multiline_natspec_item(input).finish();
481            assert!(
482                res.is_ok(),
483                "Failed on input: '{}', Error: {:?}",
484                input,
485                res.err()
486            );
487            let (_, item) = res.unwrap();
488            assert_eq!(item.kind, kind);
489            assert_eq!(item.comment, comment.to_string());
490        }
491    }
492
493    #[test]
494    fn test_single_line_comment_parser() {
495        let cases = [
496            ("/// Foo bar", NatSpecKind::Notice, "Foo bar"),
497            ("///  Baz", NatSpecKind::Notice, "Baz"),
498            (
499                "/// @notice  Hello world",
500                NatSpecKind::Notice,
501                "Hello world",
502            ),
503            (
504                "/// @param foo This is bar",
505                NatSpecKind::Param {
506                    name: "foo".to_string(),
507                },
508                "This is bar",
509            ),
510            (
511                "/// @return The return value",
512                NatSpecKind::Return { name: None },
513                "The return value",
514            ),
515            (
516                "/// @custom:foo  This is bar",
517                NatSpecKind::Custom {
518                    tag: "foo".to_string(),
519                },
520                "This is bar",
521            ),
522        ];
523        for (input, kind, comment) in cases {
524            let res = parse_natspec_comment(input);
525            assert!(
526                res.is_ok(),
527                "Failed on input: '{}', Error: {:?}",
528                input,
529                res.err()
530            );
531            let natspec = res.unwrap();
532            assert_eq!(natspec.items.len(), 1);
533            assert_eq!(natspec.items[0].kind, kind);
534            assert_eq!(natspec.items[0].comment, comment.to_string());
535        }
536    }
537
538    #[test]
539    fn test_single_line_empty() {
540        let res = parse_natspec_comment("///");
541        assert!(res.is_ok(), "{:?}", res.err());
542        let natspec = res.unwrap();
543        assert_eq!(natspec, NatSpec::default());
544
545        let res = parse_natspec_comment("/// ");
546        assert!(res.is_ok(), "{:?}", res.err());
547        let natspec = res.unwrap();
548        assert_eq!(natspec, NatSpec::default());
549    }
550
551    #[test]
552    fn test_single_line_invalid_delimiter() {
553        let res = parse_natspec_comment("//// Hello");
554        assert!(res.is_err());
555    }
556
557    #[test]
558    fn test_multiline_comment_parser() {
559        let comment = "/**\n     * @notice Some notice text.\n     */";
560        let res = parse_natspec_comment(comment);
561        assert!(res.is_ok(), "{:?}", res.err());
562        let natspec = res.unwrap();
563        assert_eq!(natspec.items.len(), 1);
564        assert_eq!(
565            natspec.items[0],
566            NatSpecItem {
567                kind: NatSpecKind::Notice,
568                comment: "Some notice text.".to_string()
569            }
570        );
571    }
572
573    #[test]
574    fn test_multiline_two_items() {
575        let comment = "/**\n     * @notice Some notice text.\n     * @custom:something\n     */";
576        let res = parse_natspec_comment(comment);
577        assert!(res.is_ok(), "{:?}", res.err());
578        let natspec = res.unwrap();
579        assert_eq!(natspec.items.len(), 2);
580        assert_eq!(
581            natspec.items[0],
582            NatSpecItem {
583                kind: NatSpecKind::Notice,
584                comment: "Some notice text.".to_string()
585            }
586        );
587        assert_eq!(
588            natspec.items[1],
589            NatSpecItem {
590                kind: NatSpecKind::Custom {
591                    tag: "something".to_string()
592                },
593                comment: "".to_string()
594            }
595        );
596    }
597
598    #[test]
599    fn test_multiline_mixed_leading_asterisks() {
600        let comment = "/** @notice First line.\n  Another line, no asterisk.\n\t* @param p The param\n ** @dev Dev comment */";
601        let res = parse_natspec_comment(comment);
602        assert!(res.is_ok(), "Input: '{}'\nError: {:?}", comment, res.err());
603        let natspec = res.unwrap();
604
605        assert_eq!(natspec.items.len(), 4);
606        assert_eq!(
607            natspec.items[0],
608            NatSpecItem {
609                kind: NatSpecKind::Notice,
610                comment: "First line.".to_string()
611            }
612        );
613        assert_eq!(
614            natspec.items[1],
615            NatSpecItem {
616                kind: NatSpecKind::Notice,
617                comment: "Another line, no asterisk.".to_string()
618            }
619        );
620        assert_eq!(
621            natspec.items[2],
622            NatSpecItem {
623                kind: NatSpecKind::Param {
624                    name: "p".to_string()
625                },
626                comment: "The param".to_string()
627            }
628        );
629        assert_eq!(
630            natspec.items[3],
631            NatSpecItem {
632                kind: NatSpecKind::Dev,
633                comment: "Dev comment".to_string()
634            }
635        );
636    }
637
638    #[test]
639    fn test_multiline_empty_comment() {
640        let comment = "/**\n        */";
641        let res = parse_natspec_comment(comment);
642        assert!(res.is_ok(), "{:?}", res.err());
643        let natspec = res.unwrap();
644        assert_eq!(natspec, NatSpec::default());
645
646        let comment = "/** */";
647        let res = parse_natspec_comment(comment);
648        assert!(res.is_ok(), "{:?}", res.err());
649        let natspec = res.unwrap();
650        assert_eq!(natspec, NatSpec::default());
651
652        let comment = "/***/";
653        let res = parse_natspec_comment(comment);
654        assert!(res.is_ok(), "{:?}", res.err());
655        let natspec = res.unwrap();
656        assert_eq!(natspec, NatSpec::default());
657    }
658
659    #[test]
660    fn test_multiline_invalid_delimiter() {
661        let comment = "/*** @notice Some text\n    ** */";
662        let res = parse_natspec_comment(comment);
663        // Debug: Parse result for '/***'
664        assert!(res.is_err(), "Expected error for input: {}", comment);
665    }
666
667    #[test]
668    fn test_populate_returns_logic() {
669        let mut item = NatSpecItem {
670            kind: NatSpecKind::Return { name: None },
671            comment: "value The value returned".to_string(),
672        };
673        let identifiers = vec![
674            Identifier {
675                name: Some("value".to_string()),
676                span: TextRange::default(),
677            },
678            Identifier {
679                name: Some("success".to_string()),
680                span: TextRange::default(),
681            },
682        ];
683        item.populate_return(&identifiers);
684        assert_eq!(
685            item.kind,
686            NatSpecKind::Return {
687                name: Some("value".to_string())
688            }
689        );
690        assert_eq!(item.comment, "The value returned".to_string());
691
692        let mut natspec = NatSpec { items: vec![item] };
693        natspec = natspec.populate_returns(&identifiers);
694        assert_eq!(
695            natspec.items[0].kind,
696            NatSpecKind::Return {
697                name: Some("value".to_string())
698            }
699        );
700        assert_eq!(natspec.items[0].comment, "The value returned".to_string());
701    }
702
703    #[test]
704    fn test_populate_returns_no_match() {
705        let mut item = NatSpecItem {
706            kind: NatSpecKind::Return { name: None },
707            comment: "Something else".to_string(),
708        };
709        let identifiers = vec![Identifier {
710            name: Some("value".to_string()),
711            span: TextRange::default(),
712        }];
713        item.populate_return(&identifiers);
714        assert_eq!(item.kind, NatSpecKind::Return { name: None });
715        assert_eq!(item.comment, "Something else".to_string());
716    }
717}