prost_build/
ast.rs

1use prost_types::source_code_info::Location;
2#[cfg(feature = "cleanup-markdown")]
3use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
4use regex::Regex;
5use std::sync::LazyLock;
6
7/// Comments on a Protobuf item.
8#[derive(Debug, Default, Clone)]
9pub struct Comments {
10    /// Leading detached blocks of comments.
11    pub leading_detached: Vec<Vec<String>>,
12
13    /// Leading comments.
14    pub leading: Vec<String>,
15
16    /// Trailing comments.
17    pub trailing: Vec<String>,
18}
19
20impl Comments {
21    pub(crate) fn from_location(location: &Location) -> Comments {
22        let leading_detached = location
23            .leading_detached_comments
24            .iter()
25            .map(get_lines)
26            .collect();
27        let leading = location
28            .leading_comments
29            .as_ref()
30            .map_or(Vec::new(), get_lines);
31        let trailing = location
32            .trailing_comments
33            .as_ref()
34            .map_or(Vec::new(), get_lines);
35        Comments {
36            leading_detached,
37            leading,
38            trailing,
39        }
40    }
41
42    /// Appends the comments to a buffer with indentation.
43    ///
44    /// Each level of indentation corresponds to four space (' ') characters.
45    pub fn append_with_indent(&self, indent_level: u8, buf: &mut String) {
46        // Append blocks of detached comments.
47        for detached_block in &self.leading_detached {
48            for line in detached_block {
49                for _ in 0..indent_level {
50                    buf.push_str("    ");
51                }
52                buf.push_str("//");
53                buf.push_str(&Self::sanitize_line(line));
54                buf.push('\n');
55            }
56            buf.push('\n');
57        }
58
59        // Append leading comments.
60        for line in &self.leading {
61            for _ in 0..indent_level {
62                buf.push_str("    ");
63            }
64            buf.push_str("///");
65            buf.push_str(&Self::sanitize_line(line));
66            buf.push('\n');
67        }
68
69        // Append an empty comment line if there are leading and trailing comments.
70        if !self.leading.is_empty() && !self.trailing.is_empty() {
71            for _ in 0..indent_level {
72                buf.push_str("    ");
73            }
74            buf.push_str("///\n");
75        }
76
77        // Append trailing comments.
78        for line in &self.trailing {
79            for _ in 0..indent_level {
80                buf.push_str("    ");
81            }
82            buf.push_str("///");
83            buf.push_str(&Self::sanitize_line(line));
84            buf.push('\n');
85        }
86    }
87
88    /// Checks whether a RustDoc line should be indented.
89    ///
90    /// Lines should be indented if:
91    /// - they are non-empty, AND
92    ///   - they don't already start with a space
93    ///     OR
94    ///   - they start with several spaces.
95    ///
96    /// The last condition can happen in the case of multi-line Markdown lists
97    /// such as:
98    ///
99    /// - this is a list
100    ///   where some elements spans multiple lines
101    /// - but not all elements
102    fn should_indent(sanitized_line: &str) -> bool {
103        let mut chars = sanitized_line.chars();
104        chars
105            .next()
106            .is_some_and(|c| c != ' ' || chars.next() == Some(' '))
107    }
108
109    /// Sanitizes the line for rustdoc by performing the following operations:
110    ///     - escape urls as <http://foo.com>
111    ///     - escape `[` & `]` if not already escaped and not followed by a parenthesis or bracket
112    fn sanitize_line(line: &str) -> String {
113        static RULE_URL: LazyLock<Regex> =
114            LazyLock::new(|| Regex::new(r"https?://[^\s)]+").unwrap());
115        static RULE_BRACKETS: LazyLock<Regex> =
116            LazyLock::new(|| Regex::new(r"(^|[^\]\\])\[(([^\]]*[^\\])?)\]([^(\[]|$)").unwrap());
117
118        let mut s = RULE_URL.replace_all(line, r"<$0>").to_string();
119        s = RULE_BRACKETS.replace_all(&s, r"$1\[$2\]$4").to_string();
120        if Self::should_indent(&s) {
121            s.insert(0, ' ');
122        }
123        s
124    }
125}
126
127/// A service descriptor.
128#[derive(Debug, Clone)]
129pub struct Service {
130    /// The service name in Rust style.
131    pub name: String,
132    /// The service name as it appears in the .proto file.
133    pub proto_name: String,
134    /// The package name as it appears in the .proto file.
135    pub package: String,
136    /// The service comments.
137    pub comments: Comments,
138    /// The service methods.
139    pub methods: Vec<Method>,
140    /// The service options.
141    pub options: prost_types::ServiceOptions,
142}
143
144/// A service method descriptor.
145#[derive(Debug, Clone)]
146pub struct Method {
147    /// The name of the method in Rust style.
148    pub name: String,
149    /// The name of the method as it appears in the .proto file.
150    pub proto_name: String,
151    /// The method comments.
152    pub comments: Comments,
153    /// The input Rust type.
154    pub input_type: String,
155    /// The output Rust type.
156    pub output_type: String,
157    /// The input Protobuf type.
158    pub input_proto_type: String,
159    /// The output Protobuf type.
160    pub output_proto_type: String,
161    /// The method options.
162    pub options: prost_types::MethodOptions,
163    /// Identifies if client streams multiple client messages.
164    pub client_streaming: bool,
165    /// Identifies if server streams multiple server messages.
166    pub server_streaming: bool,
167}
168
169#[cfg(not(feature = "cleanup-markdown"))]
170fn get_lines<S>(comments: S) -> Vec<String>
171where
172    S: AsRef<str>,
173{
174    comments.as_ref().lines().map(str::to_owned).collect()
175}
176
177#[cfg(feature = "cleanup-markdown")]
178fn get_lines<S>(comments: S) -> Vec<String>
179where
180    S: AsRef<str>,
181{
182    let comments = comments.as_ref();
183    let mut buffer = String::with_capacity(comments.len() + 256);
184    let opts = pulldown_cmark_to_cmark::Options {
185        code_block_token_count: 3,
186        ..Default::default()
187    };
188    match pulldown_cmark_to_cmark::cmark_with_options(
189        Parser::new_ext(comments, Options::all() - Options::ENABLE_SMART_PUNCTUATION).map(
190            |event| {
191                fn map_codeblock(kind: CodeBlockKind) -> CodeBlockKind {
192                    match kind {
193                        CodeBlockKind::Fenced(s) => {
194                            if s.as_ref() == "rust" {
195                                CodeBlockKind::Fenced("compile_fail".into())
196                            } else {
197                                CodeBlockKind::Fenced(format!("text,{s}").into())
198                            }
199                        }
200                        CodeBlockKind::Indented => CodeBlockKind::Fenced("text".into()),
201                    }
202                }
203                match event {
204                    Event::Start(Tag::CodeBlock(kind)) => {
205                        Event::Start(Tag::CodeBlock(map_codeblock(kind)))
206                    }
207                    e => e,
208                }
209            },
210        ),
211        &mut buffer,
212        opts,
213    ) {
214        Ok(_) => buffer.lines().map(str::to_owned).collect(),
215        Err(_) => comments.lines().map(str::to_owned).collect(),
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn test_comment_append_with_indent_leaves_prespaced_lines() {
225        struct TestCases {
226            name: &'static str,
227            input: String,
228            expected: String,
229        }
230
231        let tests = vec![
232            TestCases {
233                name: "existing_space",
234                input: " A line with a single leading space.".to_string(),
235                expected: "/// A line with a single leading space.\n".to_string(),
236            },
237            TestCases {
238                name: "non_existing_space",
239                input: "A line without a single leading space.".to_string(),
240                expected: "/// A line without a single leading space.\n".to_string(),
241            },
242            TestCases {
243                name: "empty",
244                input: "".to_string(),
245                expected: "///\n".to_string(),
246            },
247            TestCases {
248                name: "multiple_leading_spaces",
249                input: "  a line with several leading spaces, such as in a markdown list"
250                    .to_string(),
251                expected: "///   a line with several leading spaces, such as in a markdown list\n"
252                    .to_string(),
253            },
254        ];
255        for t in tests {
256            let input = Comments {
257                leading_detached: vec![],
258                leading: vec![],
259                trailing: vec![t.input],
260            };
261
262            let mut actual = "".to_string();
263            input.append_with_indent(0, &mut actual);
264
265            assert_eq!(t.expected, actual, "failed {}", t.name);
266        }
267    }
268
269    #[test]
270    fn test_comment_append_with_indent_sanitizes_comment_doc_url() {
271        struct TestCases {
272            name: &'static str,
273            input: String,
274            expected: String,
275        }
276
277        let tests = vec![
278            TestCases {
279                name: "valid_http",
280                input: "See https://www.rust-lang.org/".to_string(),
281                expected: "/// See <https://www.rust-lang.org/>\n".to_string(),
282            },
283            TestCases {
284                name: "valid_https",
285                input: "See https://www.rust-lang.org/".to_string(),
286                expected: "/// See <https://www.rust-lang.org/>\n".to_string(),
287            },
288            TestCases {
289                name: "valid_https_parenthesis",
290                input: "See (https://www.rust-lang.org/)".to_string(),
291                expected: "/// See (<https://www.rust-lang.org/>)\n".to_string(),
292            },
293            TestCases {
294                name: "invalid",
295                input: "See note://abc".to_string(),
296                expected: "/// See note://abc\n".to_string(),
297            },
298        ];
299        for t in tests {
300            let input = Comments {
301                leading_detached: vec![],
302                leading: vec![],
303                trailing: vec![t.input],
304            };
305
306            let mut actual = "".to_string();
307            input.append_with_indent(0, &mut actual);
308
309            assert_eq!(t.expected, actual, "failed {}", t.name);
310        }
311    }
312
313    #[test]
314    fn test_comment_append_with_indent_sanitizes_square_brackets() {
315        struct TestCases {
316            name: &'static str,
317            input: String,
318            expected: String,
319        }
320
321        let tests = vec![
322            TestCases {
323                name: "valid_brackets",
324                input: "foo [bar] baz".to_string(),
325                expected: "/// foo \\[bar\\] baz\n".to_string(),
326            },
327            TestCases {
328                name: "invalid_start_bracket",
329                input: "foo [= baz".to_string(),
330                expected: "/// foo [= baz\n".to_string(),
331            },
332            TestCases {
333                name: "invalid_end_bracket",
334                input: "foo =] baz".to_string(),
335                expected: "/// foo =] baz\n".to_string(),
336            },
337            TestCases {
338                name: "invalid_bracket_combination",
339                input: "[0, 9)".to_string(),
340                expected: "/// [0, 9)\n".to_string(),
341            },
342            TestCases {
343                name: "valid_brackets_parenthesis",
344                input: "foo [bar](bar) baz".to_string(),
345                expected: "/// foo [bar](bar) baz\n".to_string(),
346            },
347            TestCases {
348                name: "valid_brackets_end",
349                input: "foo [bar]".to_string(),
350                expected: "/// foo \\[bar\\]\n".to_string(),
351            },
352            TestCases {
353                name: "valid_brackets_no_parenthesis",
354                input: "foo [bar]baz".to_string(),
355                expected: "/// foo \\[bar\\]baz\n".to_string(),
356            },
357            TestCases {
358                name: "valid_empty_brackets",
359                input: "foo []".to_string(),
360                expected: "/// foo \\[\\]\n".to_string(),
361            },
362            TestCases {
363                name: "valid_empty_brackets_parenthesis",
364                input: "foo []()".to_string(),
365                expected: "/// foo []()\n".to_string(),
366            },
367            TestCases {
368                name: "valid_brackets_brackets",
369                input: "foo [bar][bar] baz".to_string(),
370                expected: "/// foo [bar][bar] baz\n".to_string(),
371            },
372            TestCases {
373                name: "valid_brackets_brackets_end",
374                input: "foo [bar][baz]".to_string(),
375                expected: "/// foo [bar][baz]\n".to_string(),
376            },
377            TestCases {
378                name: "valid_brackets_brackets_all",
379                input: "[bar][baz]".to_string(),
380                expected: "/// [bar][baz]\n".to_string(),
381            },
382            TestCases {
383                name: "escaped_brackets",
384                input: "\\[bar\\]\\[baz\\]".to_string(),
385                expected: "/// \\[bar\\]\\[baz\\]\n".to_string(),
386            },
387            TestCases {
388                name: "escaped_empty_brackets",
389                input: "\\[\\]\\[\\]".to_string(),
390                expected: "/// \\[\\]\\[\\]\n".to_string(),
391            },
392        ];
393        for t in tests {
394            let input = Comments {
395                leading_detached: vec![],
396                leading: vec![],
397                trailing: vec![t.input],
398            };
399
400            let mut actual = "".to_string();
401            input.append_with_indent(0, &mut actual);
402
403            assert_eq!(t.expected, actual, "failed {}", t.name);
404        }
405    }
406
407    #[test]
408    fn test_codeblocks() {
409        struct TestCase {
410            name: &'static str,
411            input: &'static str,
412            #[allow(unused)]
413            cleanedup_expected: Vec<&'static str>,
414        }
415
416        let tests = vec![
417            TestCase {
418                name: "unlabelled_block",
419                input: "    thingy\n",
420                cleanedup_expected: vec!["", "```text", "thingy", "```"],
421            },
422            TestCase {
423                name: "rust_block",
424                input: "```rust\nfoo.bar()\n```\n",
425                cleanedup_expected: vec!["", "```compile_fail", "foo.bar()", "```"],
426            },
427            TestCase {
428                name: "js_block",
429                input: "```javascript\nfoo.bar()\n```\n",
430                cleanedup_expected: vec!["", "```text,javascript", "foo.bar()", "```"],
431            },
432        ];
433
434        for t in tests {
435            let loc = Location {
436                path: vec![],
437                span: vec![],
438                leading_comments: Some(t.input.into()),
439                trailing_comments: None,
440                leading_detached_comments: vec![],
441            };
442            let comments = Comments::from_location(&loc);
443            #[cfg(feature = "cleanup-markdown")]
444            let expected = t.cleanedup_expected;
445            #[cfg(not(feature = "cleanup-markdown"))]
446            let expected: Vec<&str> = t.input.lines().collect();
447            assert_eq!(expected, comments.leading, "failed {}", t.name);
448        }
449    }
450}