cpp_linter/clang_tools/
mod.rs

1//! This crate holds the functionality related to running clang-format and/or
2//! clang-tidy.
3
4use std::{
5    env::current_dir,
6    fs,
7    path::{Path, PathBuf},
8    process::Command,
9    sync::{Arc, Mutex},
10};
11
12use anyhow::{anyhow, Context, Result};
13use git2::{DiffOptions, Patch};
14// non-std crates
15use lenient_semver;
16use regex::Regex;
17use semver::Version;
18use tokio::task::JoinSet;
19use which::{which, which_in};
20
21// project-specific modules/crates
22use super::common_fs::FileObj;
23use crate::{
24    cli::ClangParams,
25    rest_api::{RestApiClient, COMMENT_MARKER, USER_OUTREACH},
26};
27pub mod clang_format;
28use clang_format::run_clang_format;
29pub mod clang_tidy;
30use clang_tidy::{run_clang_tidy, CompilationUnit};
31
32/// Fetch the path to a clang tool by `name` (ie `"clang-tidy"` or `"clang-format"`) and
33/// `version`.
34///
35/// The specified `version` can be either
36///
37/// - a full or partial semantic version specification
38/// - a path to a directory containing the executable binary `name`d
39///
40/// If the executable is not found using the specified `version`, then the tool is
41/// sought only by it's `name`.
42///
43/// The only reason this function would return an error is if the specified tool is not
44/// installed or present on the system (nor in the `$PATH` environment variable).
45pub fn get_clang_tool_exe(name: &str, version: &str) -> Result<PathBuf> {
46    if version.is_empty() {
47        // The default CLI value is an empty string.
48        // Thus, we should use whatever is installed and added to $PATH.
49        if let Ok(cmd) = which(name) {
50            return Ok(cmd);
51        } else {
52            return Err(anyhow!("Could not find clang tool by name"));
53        }
54    }
55    if let Ok(semver) = lenient_semver::parse_into::<Version>(version) {
56        // `version` specified has at least a major version number
57        if let Ok(cmd) = which(format!("{}-{}", name, semver.major)) {
58            Ok(cmd)
59        } else if let Ok(cmd) = which(name) {
60            // USERS SHOULD MAKE SURE THE PROPER VERSION IS INSTALLED BEFORE USING CPP-LINTER!!!
61            // This block essentially ignores the version specified as a fail-safe.
62            //
63            // On Windows, the version's major number is typically not appended to the name of
64            // the executable (or symlink for executable), so this is useful in that scenario.
65            // On Unix systems, this block is not likely reached. Typically, installing clang
66            // will produce a symlink to the executable with the major version appended to the
67            // name.
68            return Ok(cmd);
69        } else {
70            return Err(anyhow!("Could not find clang tool by name and version"));
71        }
72    } else {
73        // `version` specified is not a semantic version; treat as path/to/bin
74        if let Ok(exe_path) = which_in(name, Some(version), current_dir().unwrap()) {
75            Ok(exe_path)
76        } else {
77            Err(anyhow!("Could not find clang tool by path"))
78        }
79    }
80}
81
82/// This creates a task to run clang-tidy and clang-format on a single file.
83///
84/// Returns a Future that infallibly resolves to a 2-tuple that contains
85///
86/// 1. The file's path.
87/// 2. A collections of cached logs. A [`Vec`] of tuples that hold
88///    - log level
89///    - messages
90fn analyze_single_file(
91    file: Arc<Mutex<FileObj>>,
92    clang_params: Arc<ClangParams>,
93) -> Result<(PathBuf, Vec<(log::Level, String)>)> {
94    let mut file = file
95        .lock()
96        .map_err(|_| anyhow!("Failed to lock file mutex"))?;
97    let mut logs = vec![];
98    if clang_params.clang_tidy_command.is_some() {
99        if clang_params
100            .tidy_filter
101            .as_ref()
102            .is_some_and(|f| f.is_source_or_ignored(file.name.as_path()))
103            || clang_params.tidy_filter.is_none()
104        {
105            let tidy_result = run_clang_tidy(&mut file, &clang_params)?;
106            logs.extend(tidy_result);
107        } else {
108            logs.push((
109                log::Level::Info,
110                format!(
111                    "{} not scanned by clang-tidy due to `--ignore-tidy`",
112                    file.name.as_os_str().to_string_lossy()
113                ),
114            ));
115        }
116    }
117    if clang_params.clang_format_command.is_some() {
118        if clang_params
119            .format_filter
120            .as_ref()
121            .is_some_and(|f| f.is_source_or_ignored(file.name.as_path()))
122            || clang_params.format_filter.is_none()
123        {
124            let format_result = run_clang_format(&mut file, &clang_params)?;
125            logs.extend(format_result);
126        } else {
127            logs.push((
128                log::Level::Info,
129                format!(
130                    "{} not scanned by clang-format due to `--ignore-format`",
131                    file.name.as_os_str().to_string_lossy()
132                ),
133            ));
134        }
135    }
136    Ok((file.name.clone(), logs))
137}
138
139/// A struct to contain the version numbers of the clang-tools used
140#[derive(Default)]
141pub struct ClangVersions {
142    /// The clang-format version used.
143    pub format_version: Option<String>,
144
145    /// The clang-tidy version used.
146    pub tidy_version: Option<String>,
147}
148
149/// Run `clang-tool --version`, then extract and return the version number.
150fn capture_clang_version(clang_tool: &PathBuf) -> Result<String> {
151    let output = Command::new(clang_tool).arg("--version").output()?;
152    let stdout = String::from_utf8_lossy(&output.stdout);
153    let version_pattern = Regex::new(r"(?i)version\s*([\d.]+)").unwrap();
154    let captures = version_pattern.captures(&stdout).ok_or(anyhow!(
155        "Failed to find version number in `{} --version` output",
156        clang_tool.to_string_lossy()
157    ))?;
158    Ok(captures.get(1).unwrap().as_str().to_string())
159}
160
161/// Runs clang-tidy and/or clang-format and returns the parsed output from each.
162///
163/// If `tidy_checks` is `"-*"` then clang-tidy is not executed.
164/// If `style` is a blank string (`""`), then clang-format is not executed.
165pub async fn capture_clang_tools_output(
166    files: &mut Vec<Arc<Mutex<FileObj>>>,
167    version: &str,
168    clang_params: &mut ClangParams,
169    rest_api_client: &impl RestApiClient,
170) -> Result<ClangVersions> {
171    let mut clang_versions = ClangVersions::default();
172    // find the executable paths for clang-tidy and/or clang-format and show version
173    // info as debugging output.
174    if clang_params.tidy_checks != "-*" {
175        let exe_path = get_clang_tool_exe("clang-tidy", version)?;
176        let version_found = capture_clang_version(&exe_path)?;
177        log::debug!(
178            "{} --version: v{version_found}",
179            &exe_path.to_string_lossy()
180        );
181        clang_versions.tidy_version = Some(version_found);
182        clang_params.clang_tidy_command = Some(exe_path);
183    }
184    if !clang_params.style.is_empty() {
185        let exe_path = get_clang_tool_exe("clang-format", version)?;
186        let version_found = capture_clang_version(&exe_path)?;
187        log::debug!(
188            "{} --version: v{version_found}",
189            &exe_path.to_string_lossy()
190        );
191        clang_versions.format_version = Some(version_found);
192        clang_params.clang_format_command = Some(exe_path);
193    }
194
195    // parse database (if provided) to match filenames when parsing clang-tidy's stdout
196    if let Some(db_path) = &clang_params.database {
197        if let Ok(db_str) = fs::read(db_path.join("compile_commands.json")) {
198            clang_params.database_json = Some(
199                // A compilation database should be UTF-8 encoded, but file paths are not; use lossy conversion.
200                serde_json::from_str::<Vec<CompilationUnit>>(&String::from_utf8_lossy(&db_str))
201                    .with_context(|| "Failed to parse compile_commands.json")?,
202            )
203        }
204    };
205
206    let mut executors = JoinSet::new();
207    // iterate over the discovered files and run the clang tools
208    for file in files {
209        let arc_params = Arc::new(clang_params.clone());
210        let arc_file = Arc::clone(file);
211        executors.spawn(async move { analyze_single_file(arc_file, arc_params) });
212    }
213
214    while let Some(output) = executors.join_next().await {
215        if let Ok(out) = output? {
216            let (file_name, logs) = out;
217            rest_api_client.start_log_group(format!("Analyzing {}", file_name.to_string_lossy()));
218            for (level, msg) in logs {
219                log::log!(level, "{}", msg);
220            }
221            rest_api_client.end_log_group();
222        }
223    }
224    Ok(clang_versions)
225}
226
227/// A struct to describe a single suggestion in a pull_request review.
228pub struct Suggestion {
229    /// The file's line number in the diff that begins the suggestion.
230    pub line_start: u32,
231    /// The file's line number in the diff that ends the suggestion.
232    pub line_end: u32,
233    /// The actual suggestion.
234    pub suggestion: String,
235    /// The file that this suggestion pertains to.
236    pub path: String,
237}
238
239/// A struct to describe the Pull Request review suggestions.
240#[derive(Default)]
241pub struct ReviewComments {
242    /// The total count of suggestions from clang-tidy and clang-format.
243    ///
244    /// This differs from `comments.len()` because some suggestions may
245    /// not fit within the file's diff.
246    pub tool_total: [Option<u32>; 2],
247    /// A list of comment suggestions to be posted.
248    ///
249    /// These suggestions are guaranteed to fit in the file's diff.
250    pub comments: Vec<Suggestion>,
251    /// The complete patch of changes to all files scanned.
252    ///
253    /// This includes changes from both clang-tidy and clang-format
254    /// (assembled in that order).
255    pub full_patch: [String; 2],
256}
257
258impl ReviewComments {
259    pub fn summarize(&self, clang_versions: &ClangVersions) -> String {
260        let mut body = format!("{COMMENT_MARKER}## Cpp-linter Review\n");
261        for t in 0u8..=1 {
262            let mut total = 0;
263            let (tool_name, tool_version) = if t == 0 {
264                ("clang-format", clang_versions.format_version.as_ref())
265            } else {
266                ("clang-tidy", clang_versions.tidy_version.as_ref())
267            };
268
269            let tool_total = if let Some(total) = self.tool_total[t as usize] {
270                total
271            } else {
272                // review was not requested from this tool or the tool was not used at all
273                continue;
274            };
275
276            // If the tool's version is unknown, then we don't need to output this line.
277            // NOTE: If the tool was invoked at all, then the tool's version shall be known.
278            if let Some(ver_str) = tool_version {
279                body.push_str(format!("\n### Used {tool_name} v{ver_str}\n").as_str());
280            }
281            for comment in &self.comments {
282                if comment
283                    .suggestion
284                    .contains(format!("### {tool_name}").as_str())
285                {
286                    total += 1;
287                }
288            }
289
290            if total != tool_total {
291                body.push_str(
292                    format!(
293                        "\nOnly {total} out of {tool_total} {tool_name} concerns fit within this pull request's diff.\n",
294                    )
295                    .as_str(),
296                );
297            }
298            if !self.full_patch[t as usize].is_empty() {
299                body.push_str(
300                    format!(
301                        "\n<details><summary>Click here for the full {tool_name} patch</summary>\n\n```diff\n{}```\n\n</details>\n",
302                        self.full_patch[t as usize]
303                    ).as_str()
304                );
305            } else {
306                body.push_str(
307                    format!(
308                        "\nNo concerns reported by {}. Great job! :tada:\n",
309                        tool_name
310                    )
311                    .as_str(),
312                )
313            }
314        }
315        body.push_str(USER_OUTREACH);
316        body
317    }
318
319    pub fn is_comment_in_suggestions(&mut self, comment: &Suggestion) -> bool {
320        for s in &mut self.comments {
321            if s.path == comment.path
322                && s.line_end == comment.line_end
323                && s.line_start == comment.line_start
324            {
325                s.suggestion.push('\n');
326                s.suggestion.push_str(comment.suggestion.as_str());
327                return true;
328            }
329        }
330        false
331    }
332}
333
334pub fn make_patch<'buffer>(
335    path: &Path,
336    patched: &'buffer [u8],
337    original_content: &'buffer [u8],
338) -> Result<Patch<'buffer>> {
339    let mut diff_opts = &mut DiffOptions::new();
340    diff_opts = diff_opts.indent_heuristic(true);
341    diff_opts = diff_opts.context_lines(0);
342    let patch = Patch::from_buffers(
343        original_content,
344        Some(path),
345        patched,
346        Some(path),
347        Some(diff_opts),
348    )
349    .with_context(|| {
350        format!(
351            "Failed to create patch for file {}.",
352            path.to_string_lossy()
353        )
354    })?;
355    Ok(patch)
356}
357
358pub trait MakeSuggestions {
359    /// Create some user-facing helpful info about what the suggestion aims to resolve.
360    fn get_suggestion_help(&self, start_line: u32, end_line: u32) -> String;
361
362    /// Get the tool's name which generated the advice.
363    fn get_tool_name(&self) -> String;
364
365    /// Create a bunch of suggestions from a [`FileObj`]'s advice's generated `patched` buffer.
366    fn get_suggestions(
367        &self,
368        review_comments: &mut ReviewComments,
369        file_obj: &FileObj,
370        patch: &mut Patch,
371        summary_only: bool,
372    ) -> Result<()> {
373        let tool_name = self.get_tool_name();
374        let is_tidy_tool = tool_name == "clang-tidy";
375        let hunks_total = patch.num_hunks();
376        let mut hunks_in_patch = 0u32;
377        let file_name = file_obj
378            .name
379            .to_string_lossy()
380            .replace("\\", "/")
381            .trim_start_matches("./")
382            .to_owned();
383        let patch_buf = &patch
384            .to_buf()
385            .with_context(|| "Failed to convert patch to byte array")?
386            .to_vec();
387        review_comments.full_patch[is_tidy_tool as usize].push_str(
388            String::from_utf8(patch_buf.to_owned())
389                .with_context(|| format!("Failed to convert patch to string: {file_name}"))?
390                .as_str(),
391        );
392        review_comments.tool_total[is_tidy_tool as usize].get_or_insert(0);
393        if summary_only {
394            return Ok(());
395        }
396        for hunk_id in 0..hunks_total {
397            let (hunk, line_count) = patch.hunk(hunk_id).with_context(|| {
398                format!("Failed to get hunk {hunk_id} from patch for {file_name}")
399            })?;
400            hunks_in_patch += 1;
401            let hunk_range = file_obj.is_hunk_in_diff(&hunk);
402            if hunk_range.is_none() {
403                continue;
404            }
405            let (start_line, end_line) = hunk_range.unwrap();
406            let mut suggestion = String::new();
407            let suggestion_help = self.get_suggestion_help(start_line, end_line);
408            let mut removed = vec![];
409            for line_index in 0..line_count {
410                let diff_line = patch
411                    .line_in_hunk(hunk_id, line_index)
412                    .with_context(|| format!("Failed to get line {line_index} in a hunk {hunk_id} of patch for {file_name}"))?;
413                let line = String::from_utf8(diff_line.content().to_owned())
414                    .with_context(|| format!("Failed to convert line {line_index} buffer to string in hunk {hunk_id} of patch for {file_name}"))?;
415                if ['+', ' '].contains(&diff_line.origin()) {
416                    suggestion.push_str(line.as_str());
417                } else {
418                    removed.push(
419                        diff_line
420                            .old_lineno()
421                            .expect("Removed line should have a line number"),
422                    );
423                }
424            }
425            if suggestion.is_empty() && !removed.is_empty() {
426                suggestion.push_str(
427                    format!(
428                        "Please remove the line(s)\n- {}",
429                        removed
430                            .iter()
431                            .map(|l| l.to_string())
432                            .collect::<Vec<String>>()
433                            .join("\n- ")
434                    )
435                    .as_str(),
436                )
437            } else {
438                suggestion = format!("```suggestion\n{suggestion}```");
439            }
440            let comment = Suggestion {
441                line_start: start_line,
442                line_end: end_line,
443                suggestion: format!("{suggestion_help}\n{suggestion}"),
444                path: file_name.clone(),
445            };
446            if !review_comments.is_comment_in_suggestions(&comment) {
447                review_comments.comments.push(comment);
448            }
449        }
450        review_comments.tool_total[is_tidy_tool as usize] = Some(
451            review_comments.tool_total[is_tidy_tool as usize].unwrap_or_default() + hunks_in_patch,
452        );
453        Ok(())
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use std::env;
460
461    use super::get_clang_tool_exe;
462
463    const TOOL_NAME: &str = "clang-format";
464
465    #[test]
466    fn get_exe_by_version() {
467        let clang_version = env::var("CLANG_VERSION").unwrap_or("16".to_string());
468        let tool_exe = get_clang_tool_exe(TOOL_NAME, clang_version.as_str());
469        println!("tool_exe: {:?}", tool_exe);
470        assert!(tool_exe.is_ok_and(|val| val
471            .file_name()
472            .unwrap()
473            .to_string_lossy()
474            .to_string()
475            .contains(TOOL_NAME)));
476    }
477
478    #[test]
479    fn get_exe_by_default() {
480        let tool_exe = get_clang_tool_exe(TOOL_NAME, "");
481        println!("tool_exe: {:?}", tool_exe);
482        assert!(tool_exe.is_ok_and(|val| val
483            .file_name()
484            .unwrap()
485            .to_string_lossy()
486            .to_string()
487            .contains(TOOL_NAME)));
488    }
489
490    use which::which;
491
492    #[test]
493    fn get_exe_by_path() {
494        let clang_version = which(TOOL_NAME).unwrap();
495        let bin_path = clang_version.parent().unwrap().to_str().unwrap();
496        println!("binary exe path: {bin_path}");
497        let tool_exe = get_clang_tool_exe(TOOL_NAME, bin_path);
498        println!("tool_exe: {:?}", tool_exe);
499        assert!(tool_exe.is_ok_and(|val| val
500            .file_name()
501            .unwrap()
502            .to_string_lossy()
503            .to_string()
504            .contains(TOOL_NAME)));
505    }
506
507    #[test]
508    fn get_exe_by_invalid_path() {
509        let tool_exe = get_clang_tool_exe(TOOL_NAME, "non-existent-path");
510        assert!(tool_exe.is_err());
511    }
512
513    #[test]
514    fn get_exe_by_invalid_name() {
515        let clang_version = env::var("CLANG_VERSION").unwrap_or("16".to_string());
516        let tool_exe = get_clang_tool_exe("not-a-clang-tool", &clang_version);
517        assert!(tool_exe.is_err());
518    }
519}