Skip to main content

big_code_analysis/
comment_rm.rs

1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::enum_glob_use, clippy::if_not_else, clippy::wildcard_imports)]
8
9use std::io::{self, Write};
10use std::path::PathBuf;
11
12use crate::checker::Checker;
13
14use crate::tools::*;
15use crate::traits::*;
16
17const CR: [u8; 8192] = [b'\n'; 8192];
18
19// Hidden from rustdoc because the signature exposes `ParserTrait`,
20// which is `#[doc(hidden)]` per issue #256. The CLI's `CommentRm`
21// callback remains the documented surface.
22#[doc(hidden)]
23/// Removes comments from a code.
24pub fn rm_comments<T: ParserTrait>(parser: &T) -> Option<Vec<u8>> {
25    let node = parser.get_root();
26    let mut stack = Vec::new();
27    let mut cursor = node.cursor();
28    let mut spans = Vec::new();
29
30    stack.push(node);
31
32    while let Some(node) = stack.pop() {
33        if T::Checker::is_comment(&node) && !T::Checker::is_useful_comment(&node, parser.get_code())
34        {
35            let lines = node.end_row() - node.start_row();
36            spans.push((node.start_byte(), node.end_byte(), lines));
37        } else {
38            cursor.reset(&node);
39            if cursor.goto_first_child() {
40                loop {
41                    stack.push(cursor.node());
42                    if !cursor.goto_next_sibling() {
43                        break;
44                    }
45                }
46            }
47        }
48    }
49    if !spans.is_empty() {
50        Some(remove_from_code(parser.get_code(), spans))
51    } else {
52        None
53    }
54}
55
56fn remove_from_code(code: &[u8], mut spans: Vec<(usize, usize, usize)>) -> Vec<u8> {
57    let mut new_code = Vec::with_capacity(code.len());
58    let mut code_start = 0;
59    for (start, end, lines) in spans.drain(..).rev() {
60        new_code.extend(&code[code_start..start]);
61        if lines != 0 {
62            if lines <= CR.len() {
63                new_code.extend(&CR[..lines]);
64            } else {
65                new_code.resize_with(new_code.len() + lines, || b'\n');
66            }
67        }
68        code_start = end;
69    }
70    if code_start < code.len() {
71        new_code.extend(&code[code_start..]);
72    }
73    new_code
74}
75
76/// Configuration options for removing comments from a code.
77#[derive(Debug)]
78pub struct CommentRmCfg {
79    /// If `true`, the modified code is saved on a file
80    pub in_place: bool,
81    /// Path to output file
82    pub path: PathBuf,
83}
84
85/// Type tag identifying the comment-removal action; carries no data.
86pub struct CommentRm {
87    _guard: (),
88}
89
90impl Callback for CommentRm {
91    type Res = std::io::Result<()>;
92    type Cfg = CommentRmCfg;
93
94    fn call<T: ParserTrait>(cfg: Self::Cfg, parser: &T) -> Self::Res {
95        if let Some(new_source) = rm_comments(parser) {
96            if cfg.in_place {
97                write_file(&cfg.path, &new_source)?;
98            } else if let Ok(new_source) = std::str::from_utf8(&new_source) {
99                println!("{new_source}");
100            } else {
101                io::stdout().write_all(&new_source)?;
102            }
103        }
104        Ok(())
105    }
106}
107
108#[cfg(test)]
109#[allow(
110    clippy::float_cmp,
111    clippy::cast_precision_loss,
112    clippy::cast_possible_truncation,
113    clippy::cast_sign_loss,
114    clippy::similar_names,
115    clippy::doc_markdown,
116    clippy::needless_raw_string_hashes,
117    clippy::too_many_lines
118)]
119mod tests {
120    use std::path::PathBuf;
121
122    use crate::{CcommentParser, ParserTrait};
123
124    use super::rm_comments;
125
126    const SOURCE_CODE: &str = "/* Remove this code block */\n\
127                               int a = 42; // Remove this comment\n\
128                               // Remove this comment\n\
129                               int b = 42;\n\
130                               /* Remove\n\
131                                * this\n\
132                                * comment\n\
133                                */";
134
135    const SOURCE_CODE_NO_COMMENTS: &str = "\n\
136                                           int a = 42; \n\
137                                           \n\
138                                           int b = 42;\n\
139                                           \n\
140                                           \n\
141                                           \n\
142                                           \n";
143
144    #[test]
145    fn ccomment_remove_comments() {
146        let path = PathBuf::from("foo.c");
147        let mut trimmed_bytes = SOURCE_CODE.as_bytes().to_vec();
148        trimmed_bytes.push(b'\n');
149        let parser = CcommentParser::new(trimmed_bytes, &path, None);
150
151        let no_comments = rm_comments(&parser).unwrap();
152
153        assert_eq!(no_comments.as_slice(), SOURCE_CODE_NO_COMMENTS.as_bytes());
154    }
155}