cpp_linter/clang_tools/
clang_tidy.rs

1//! This module holds functionality specific to running clang-tidy and parsing it's
2//! output.
3
4use std::{
5    env::{consts::OS, current_dir},
6    fs,
7    path::PathBuf,
8    process::Command,
9    sync::{Arc, Mutex, MutexGuard},
10};
11
12// non-std crates
13use anyhow::{Context, Result};
14use regex::Regex;
15use serde::Deserialize;
16
17// project-specific modules/crates
18use super::MakeSuggestions;
19use crate::{
20    cli::{ClangParams, LinesChangedOnly},
21    common_fs::{normalize_path, FileObj},
22};
23
24/// Used to deserialize a json compilation database's translation unit.
25///
26/// The only purpose this serves is to normalize relative paths for build systems that
27/// use/need relative paths (ie ninja).
28#[derive(Deserialize, Debug, Clone)]
29pub struct CompilationUnit {
30    /// The directory of the build environment
31    directory: String,
32
33    /// The file path of the translation unit.
34    ///
35    /// Sometimes, this is relative to the build [`CompilationUnit::directory`].
36    ///
37    /// This is typically the path that clang-tidy uses in its stdout (for a dry run).
38    /// So, having this information helps with matching clang-tidy's stdout with the
39    /// repository files.
40    file: String,
41}
42
43/// A structure that represents a single notification parsed from clang-tidy's stdout.
44#[derive(Debug, Clone)]
45pub struct TidyNotification {
46    /// The file's path and name (supposedly relative to the repository root folder).
47    pub filename: String,
48
49    /// The line number from which the notification originated.
50    pub line: u32,
51
52    /// The column offset on the line from which the notification originated.
53    pub cols: u32,
54
55    /// The severity (ie error/warning/note) of the [`TidyNotification::diagnostic`]
56    /// that caused the notification.
57    pub severity: String,
58
59    /// A helpful message explaining why the notification exists.
60    pub rationale: String,
61
62    /// The diagnostic name as used when configuring clang-tidy.
63    pub diagnostic: String,
64
65    /// A code block that points directly to the origin of the notification.
66    ///
67    /// Sometimes, this code block doesn't exist. Sometimes, it contains suggested
68    /// fixes/advice. This information is purely superfluous.
69    pub suggestion: Vec<String>,
70
71    /// The list of line numbers that had fixes applied via `clang-tidy --fix-error`.
72    pub fixed_lines: Vec<u32>,
73}
74
75impl TidyNotification {
76    pub fn diagnostic_link(&self) -> String {
77        if self.diagnostic.starts_with("clang-diagnostic") {
78            return self.diagnostic.clone();
79        }
80        let (category, name) = if self.diagnostic.starts_with("clang-analyzer-") {
81            (
82                "clang-analyzer",
83                self.diagnostic.strip_prefix("clang-analyzer-").unwrap(),
84            )
85        } else {
86            self.diagnostic.split_once('-').unwrap()
87        };
88        format!(
89            "[{}](https://clang.llvm.org/extra/clang-tidy/checks/{category}/{name}.html)",
90            self.diagnostic
91        )
92    }
93}
94
95/// A struct to hold notification from clang-tidy about a single file
96#[derive(Debug, Clone)]
97pub struct TidyAdvice {
98    /// A list of notifications parsed from clang-tidy stdout.
99    pub notes: Vec<TidyNotification>,
100    pub patched: Option<Vec<u8>>,
101}
102
103impl MakeSuggestions for TidyAdvice {
104    fn get_suggestion_help(&self, start_line: u32, end_line: u32) -> String {
105        let mut diagnostics = vec![];
106        for note in &self.notes {
107            for fixed_line in &note.fixed_lines {
108                if (start_line..=end_line).contains(fixed_line) {
109                    diagnostics.push(format!(
110                        "- {} [{}]\n",
111                        note.rationale,
112                        note.diagnostic_link()
113                    ));
114                }
115            }
116        }
117        format!(
118            "### clang-tidy {}\n{}",
119            if diagnostics.is_empty() {
120                "suggestion"
121            } else {
122                "diagnostic(s)"
123            },
124            diagnostics.join("")
125        )
126    }
127
128    fn get_tool_name(&self) -> String {
129        "clang-tidy".to_string()
130    }
131}
132
133/// A regex pattern to capture the clang-tidy note header.
134const NOTE_HEADER: &str = r"^(.+):(\d+):(\d+):\s(\w+):(.*)\[([a-zA-Z\d\-\.]+),?[^\]]*\]$";
135
136/// Parses clang-tidy stdout.
137///
138/// Here it helps to have the JSON database deserialized for normalizing paths present
139/// in the notifications.
140fn parse_tidy_output(
141    tidy_stdout: &[u8],
142    database_json: &Option<Vec<CompilationUnit>>,
143) -> Result<TidyAdvice> {
144    let note_header = Regex::new(NOTE_HEADER).unwrap();
145    let fixed_note =
146        Regex::new(r"^.+:(\d+):\d+:\snote: FIX-IT applied suggested code changes$").unwrap();
147    let mut found_fix = false;
148    let mut notification = None;
149    let mut result = Vec::new();
150    let cur_dir = current_dir().unwrap();
151    for line in String::from_utf8(tidy_stdout.to_vec()).unwrap().lines() {
152        if let Some(captured) = note_header.captures(line) {
153            if let Some(note) = notification {
154                result.push(note);
155            }
156
157            // normalize the filename path and try to make it relative to the repo root
158            let mut filename = PathBuf::from(&captured[1]);
159            // if database was given try to use that first
160            if let Some(db_json) = &database_json {
161                let mut found_unit = false;
162                for unit in db_json {
163                    let unit_path =
164                        PathBuf::from_iter([unit.directory.as_str(), unit.file.as_str()]);
165                    if unit_path == filename {
166                        filename =
167                            normalize_path(&PathBuf::from_iter([&unit.directory, &unit.file]));
168                        found_unit = true;
169                        break;
170                    }
171                }
172                if !found_unit {
173                    // file was not a named unit in the database;
174                    // try to normalize path as if relative to working directory.
175                    // NOTE: This shouldn't happen with a properly formed JSON database
176                    filename = normalize_path(&PathBuf::from_iter([&cur_dir, &filename]));
177                }
178            } else {
179                // still need to normalize the relative path despite missing database info.
180                // let's assume the file is relative to current working directory.
181                filename = normalize_path(&PathBuf::from_iter([&cur_dir, &filename]));
182            }
183            assert!(filename.is_absolute());
184            if filename.is_absolute() && filename.starts_with(&cur_dir) {
185                // if this filename can't be made into a relative path, then it is
186                // likely not a member of the project's sources (ie /usr/include/stdio.h)
187                filename = filename
188                    .strip_prefix(&cur_dir)
189                    // we already checked above that filename.starts_with(current_directory)
190                    .unwrap()
191                    .to_path_buf();
192            }
193
194            notification = Some(TidyNotification {
195                filename: filename.to_string_lossy().to_string().replace('\\', "/"),
196                line: captured[2].parse()?,
197                cols: captured[3].parse()?,
198                severity: String::from(&captured[4]),
199                rationale: String::from(&captured[5]).trim().to_string(),
200                diagnostic: String::from(&captured[6]),
201                suggestion: Vec::new(),
202                fixed_lines: Vec::new(),
203            });
204            // begin capturing subsequent lines as suggestions
205            found_fix = false;
206        } else if let Some(capture) = fixed_note.captures(line) {
207            let fixed_line = capture[1].parse()?;
208            if let Some(note) = &mut notification {
209                if !note.fixed_lines.contains(&fixed_line) {
210                    note.fixed_lines.push(fixed_line);
211                }
212            }
213            // Suspend capturing subsequent lines as suggestions until
214            // a new notification is constructed. If we found a note about applied fixes,
215            // then the lines of suggestions for that notification have already been parsed.
216            found_fix = true;
217        } else if !found_fix {
218            if let Some(note) = &mut notification {
219                // append lines of code that are part of
220                // the previous line's notification
221                note.suggestion.push(line.to_string());
222            }
223        }
224    }
225    if let Some(note) = notification {
226        result.push(note);
227    }
228    Ok(TidyAdvice {
229        notes: result,
230        patched: None,
231    })
232}
233
234/// Get a total count of clang-tidy advice from the given list of [FileObj]s.
235pub fn tally_tidy_advice(files: &[Arc<Mutex<FileObj>>]) -> u64 {
236    let mut total = 0;
237    for file in files {
238        let file = file.lock().unwrap();
239        if let Some(advice) = &file.tidy_advice {
240            for tidy_note in &advice.notes {
241                let file_path = PathBuf::from(&tidy_note.filename);
242                if file_path == file.name {
243                    total += 1;
244                }
245            }
246        }
247    }
248    total
249}
250
251/// Run clang-tidy, then parse and return it's output.
252pub fn run_clang_tidy(
253    file: &mut MutexGuard<FileObj>,
254    clang_params: &ClangParams,
255) -> Result<Vec<(log::Level, std::string::String)>> {
256    let mut cmd = Command::new(clang_params.clang_tidy_command.as_ref().unwrap());
257    let mut logs = vec![];
258    if !clang_params.tidy_checks.is_empty() {
259        cmd.args(["-checks", &clang_params.tidy_checks]);
260    }
261    if let Some(db) = &clang_params.database {
262        cmd.args(["-p", &db.to_string_lossy()]);
263    }
264    for arg in &clang_params.extra_args {
265        cmd.args(["--extra-arg", format!("\"{}\"", arg).as_str()]);
266    }
267    let file_name = file.name.to_string_lossy().to_string();
268    if clang_params.lines_changed_only != LinesChangedOnly::Off {
269        let ranges = file.get_ranges(&clang_params.lines_changed_only);
270        if !ranges.is_empty() {
271            let filter = format!(
272                "[{{\"name\":{:?},\"lines\":{:?}}}]",
273                &file_name.replace('/', if OS == "windows" { "\\" } else { "/" }),
274                ranges
275                    .iter()
276                    .map(|r| [r.start(), r.end()])
277                    .collect::<Vec<_>>()
278            );
279            cmd.args(["--line-filter", filter.as_str()]);
280        }
281    }
282    let original_content = if !clang_params.tidy_review {
283        None
284    } else {
285        cmd.arg("--fix-errors");
286        Some(fs::read_to_string(&file.name).with_context(|| {
287            format!(
288                "Failed to cache file's original content before applying clang-tidy changes: {}",
289                file_name.clone()
290            )
291        })?)
292    };
293    if !clang_params.style.is_empty() {
294        cmd.args(["--format-style", clang_params.style.as_str()]);
295    }
296    cmd.arg(file.name.to_string_lossy().as_ref());
297    logs.push((
298        log::Level::Info,
299        format!(
300            "Running \"{} {}\"",
301            cmd.get_program().to_string_lossy(),
302            cmd.get_args()
303                .map(|x| x.to_string_lossy())
304                .collect::<Vec<_>>()
305                .join(" ")
306        ),
307    ));
308    let output = cmd.output().unwrap();
309    logs.push((
310        log::Level::Debug,
311        format!(
312            "Output from clang-tidy:\n{}",
313            String::from_utf8_lossy(&output.stdout)
314        ),
315    ));
316    if !output.stderr.is_empty() {
317        logs.push((
318            log::Level::Debug,
319            format!(
320                "clang-tidy made the following summary:\n{}",
321                String::from_utf8_lossy(&output.stderr)
322            ),
323        ));
324    }
325    file.tidy_advice = Some(parse_tidy_output(
326        &output.stdout,
327        &clang_params.database_json,
328    )?);
329    if clang_params.tidy_review {
330        if let Some(tidy_advice) = &mut file.tidy_advice {
331            // cache file changes in a buffer and restore the original contents for further analysis
332            tidy_advice.patched =
333                Some(fs::read(&file_name).with_context(|| {
334                    format!("Failed to read changes from clang-tidy: {file_name}")
335                })?);
336        }
337        // original_content is guaranteed to be Some() value at this point
338        fs::write(&file_name, original_content.unwrap())
339            .with_context(|| format!("Failed to restore file's original content: {file_name}"))?;
340    }
341    Ok(logs)
342}
343
344#[cfg(test)]
345mod test {
346    use std::{
347        env,
348        path::PathBuf,
349        str::FromStr,
350        sync::{Arc, Mutex},
351    };
352
353    use regex::Regex;
354
355    use crate::{
356        clang_tools::ClangTool,
357        cli::{ClangParams, LinesChangedOnly, RequestedVersion},
358        common_fs::FileObj,
359    };
360
361    use super::{run_clang_tidy, TidyNotification, NOTE_HEADER};
362
363    #[test]
364    fn clang_diagnostic_link() {
365        let note = TidyNotification {
366            filename: String::from("some_src.cpp"),
367            line: 1504,
368            cols: 9,
369            rationale: String::from("file not found"),
370            severity: String::from("error"),
371            diagnostic: String::from("clang-diagnostic-error"),
372            suggestion: vec![],
373            fixed_lines: vec![],
374        };
375        assert_eq!(note.diagnostic_link(), note.diagnostic);
376    }
377
378    #[test]
379    fn clang_analyzer_link() {
380        let note = TidyNotification {
381            filename: String::from("some_src.cpp"),
382            line: 1504,
383            cols: 9,
384            rationale: String::from(
385                "Dereference of null pointer (loaded from variable 'pipe_num')",
386            ),
387            severity: String::from("warning"),
388            diagnostic: String::from("clang-analyzer-core.NullDereference"),
389            suggestion: vec![],
390            fixed_lines: vec![],
391        };
392        let expected = format!(
393            "[{}](https://clang.llvm.org/extra/clang-tidy/checks/{}/{}.html)",
394            note.diagnostic, "clang-analyzer", "core.NullDereference",
395        );
396        assert_eq!(note.diagnostic_link(), expected);
397    }
398
399    // ***************** test for regex parsing of clang-tidy stdout
400
401    #[test]
402    fn test_capture() {
403        let src = "tests/demo/demo.hpp:11:11: \
404        warning: use a trailing return type for this function \
405        [modernize-use-trailing-return-type,-warnings-as-errors]";
406        let pat = Regex::new(NOTE_HEADER).unwrap();
407        let cap = pat.captures(src).unwrap();
408        assert_eq!(
409            cap.get(0).unwrap().as_str(),
410            format!(
411                "{}:{}:{}: {}:{}[{},-warnings-as-errors]",
412                cap.get(1).unwrap().as_str(),
413                cap.get(2).unwrap().as_str(),
414                cap.get(3).unwrap().as_str(),
415                cap.get(4).unwrap().as_str(),
416                cap.get(5).unwrap().as_str(),
417                cap.get(6).unwrap().as_str()
418            )
419            .as_str()
420        )
421    }
422
423    #[test]
424    fn use_extra_args() {
425        let exe_path = ClangTool::ClangTidy
426            .get_exe_path(
427                &RequestedVersion::from_str(
428                    env::var("CLANG_VERSION").unwrap_or("".to_string()).as_str(),
429                )
430                .unwrap(),
431            )
432            .unwrap();
433        let file = FileObj::new(PathBuf::from("tests/demo/demo.cpp"));
434        let arc_ref = Arc::new(Mutex::new(file));
435        let extra_args = vec!["-std=c++17".to_string(), "-Wall".to_string()];
436        let clang_params = ClangParams {
437            style: "".to_string(),
438            tidy_checks: "".to_string(), // use .clang-tidy config file
439            lines_changed_only: LinesChangedOnly::Off,
440            database: None,
441            extra_args: extra_args.clone(), // <---- the reason for this test
442            database_json: None,
443            format_filter: None,
444            tidy_filter: None,
445            tidy_review: false,
446            format_review: false,
447            clang_tidy_command: Some(exe_path),
448            clang_format_command: None,
449        };
450        let mut file_lock = arc_ref.lock().unwrap();
451        let logs = run_clang_tidy(&mut file_lock, &clang_params)
452            .unwrap()
453            .into_iter()
454            .filter_map(|(_lvl, msg)| {
455                if msg.contains("Running ") {
456                    Some(msg)
457                } else {
458                    None
459                }
460            })
461            .collect::<Vec<String>>();
462        let args = &logs
463            .first()
464            .expect("expected a log message about invoked clang-tidy command")
465            .split(' ')
466            .collect::<Vec<&str>>();
467        for arg in &extra_args {
468            let extra_arg = format!("\"{arg}\"");
469            assert!(args.contains(&extra_arg.as_str()));
470        }
471    }
472}