Skip to main content

cpp_linter/clang_tools/
clang_format.rs

1//! This module holds functionality specific to running clang-format and parsing it's
2//! output.
3
4use std::{
5    fs,
6    ops::RangeInclusive,
7    process::Command,
8    sync::{Arc, Mutex, MutexGuard},
9};
10
11use gix_imara_diff::Diff;
12use log::Level;
13
14// project-specific crates/modules
15use crate::{
16    clang_tools::make_patch, cli::ClangParams, common_fs::FileObj, error::ClangCaptureError,
17};
18
19/// A struct to hold clang-format advice for a single file.
20#[derive(Debug, Clone, PartialEq, Eq, Default)]
21pub struct FormatAdvice {
22    /// A list of line ranges that clang-format wants to replace.
23    pub replacements: Vec<RangeInclusive<u32>>,
24}
25
26/// Get a string that summarizes the given `--style`
27pub fn summarize_style(style: &str) -> String {
28    let mut char_iter = style.chars();
29    if ["google", "chromium", "microsoft", "mozilla", "webkit"].contains(&style)
30        && let Some(first_char) = char_iter.next()
31    {
32        // capitalize the first letter
33        first_char.to_ascii_uppercase().to_string() + char_iter.as_str()
34    } else if style == "llvm" || style == "gnu" {
35        style.to_ascii_uppercase()
36    } else {
37        String::from("Custom")
38    }
39}
40
41/// Get a total count of clang-format advice from the given list of [FileObj]s.
42pub fn tally_format_advice(files: &[Arc<Mutex<FileObj>>]) -> Result<u64, String> {
43    let mut total = 0;
44    for file in files {
45        let file = file.lock().map_err(|e| e.to_string())?;
46        if let Some(advice) = &file.format_advice
47            && !advice.replacements.is_empty()
48        {
49            total += 1;
50        }
51    }
52    Ok(total)
53}
54
55/// Run clang-format for a specific `file`, then parse and return it's XML output.
56pub fn run_clang_format(
57    file: &mut MutexGuard<FileObj>,
58    clang_params: &ClangParams,
59) -> Result<Vec<(log::Level, String)>, ClangCaptureError> {
60    let cmd_path = clang_params
61        .clang_format_command
62        .as_ref()
63        .ok_or(ClangCaptureError::ToolPathUnknown("clang-format"))?;
64    let mut cmd = Command::new(cmd_path);
65    cmd.current_dir(&clang_params.repo_root);
66    let mut logs = vec![];
67    cmd.args(["--style", &clang_params.style]);
68    let ranges = file.get_ranges(&clang_params.lines_changed_only);
69    for range in &ranges {
70        cmd.arg(format!("--lines={}:{}", range.start(), range.end()));
71    }
72    let cache_path = clang_params.get_cache_path();
73    let file_name = file.name.to_string_lossy().to_string();
74    cmd.arg(file.name.to_path_buf().as_os_str());
75    logs.push((
76        Level::Info,
77        format!(
78            "Getting format fixes with \"{} {}\"",
79            cmd.get_program().to_string_lossy(),
80            cmd.get_args()
81                .map(|a| a.to_string_lossy())
82                .collect::<Vec<_>>()
83                .join(" ")
84        ),
85    ));
86    let output = cmd
87        .output()
88        .map_err(|e| ClangCaptureError::FailedToRunCommand {
89            task: format!("get fixes from clang-format {file_name}"),
90            source: e,
91        })?;
92
93    if !output.stderr.is_empty() || !output.status.success() {
94        logs.push((
95            log::Level::Debug,
96            format!(
97                "clang-format raised the follow errors:\n{}",
98                String::from_utf8_lossy(&output.stderr)
99            ),
100        ));
101    }
102
103    // use a diff between patched and original contents to get format results
104    let original_contents =
105        fs::read_to_string(clang_params.repo_root.join(&file.name)).map_err(|e| {
106            ClangCaptureError::ReadFileFailed {
107                file_name: file_name.clone(),
108                source: e,
109            }
110        })?;
111    let patched_contents = String::from_utf8(output.stdout.to_vec()).map_err(|e| {
112        ClangCaptureError::NonUtf8Output {
113            task: "clang-format".to_string(),
114            source: e,
115        }
116    })?;
117    let (diff, _) = make_patch(&patched_contents, &original_contents);
118    let format_advice = FormatAdvice {
119        replacements: diff
120            .hunks()
121            .filter_map(|hunk| {
122                let replacement = if hunk.is_pure_insertion() {
123                    RangeInclusive::new(hunk.after.start, hunk.after.start)
124                } else {
125                    RangeInclusive::new(hunk.before.start, hunk.before.end.saturating_sub(1))
126                };
127                if ranges.is_empty() {
128                    Some(replacement)
129                } else {
130                    // only include replacements that fall within the specified line ranges
131                    if ranges.iter().any(|range| {
132                        range.contains(replacement.start()) && range.contains(replacement.end())
133                    }) {
134                        Some(replacement)
135                    } else {
136                        None
137                    }
138                }
139            })
140            .collect(),
141    };
142
143    // if a clang-tidy patched file exists in cache,
144    // get the diff between it and the original file,
145    // then format both clang-tidy fixes and any other changes by clang-format fixes.
146    if let Some(patched_path) = &file.patched_path
147        && patched_path.exists()
148    {
149        let mut cmd = Command::new(cmd_path);
150        cmd.current_dir(&cache_path);
151        // edit the clang-tody patched file in-place (`-i`)
152        cmd.args(["--style", &clang_params.style, "-i"]);
153        // if ranges is empty, then we're just formatting the entire file.
154        if !ranges.is_empty() {
155            let tidy_patch_contents = fs::read_to_string(patched_path).map_err(|e| {
156                ClangCaptureError::ReadFileFailed {
157                    file_name: patched_path.to_string_lossy().to_string(),
158                    source: e,
159                }
160            })?;
161            let (tidy_diff, _) = make_patch(&tidy_patch_contents, &original_contents);
162            let joint_ranges = three_way_diff(&ranges, tidy_diff);
163            for range in &joint_ranges {
164                cmd.arg(format!("--lines={}:{}", range.start(), range.end()).as_str());
165            }
166        }
167        cmd.arg(&file_name);
168        let output = cmd
169            .output()
170            .map_err(|e| ClangCaptureError::FailedToRunCommand {
171                task: format!("apply clang-format to clang-tidy fixes ({file_name})"),
172                source: e,
173            })?;
174        if !output.stderr.is_empty() || !output.status.success() {
175            logs.push((
176                log::Level::Debug,
177                format!(
178                    "clang-format raised the follow errors about clang-tidy fixes:\n{}",
179                    String::from_utf8_lossy(&output.stderr)
180                ),
181            ));
182        }
183    } else {
184        // clang-tidy was not run on this file,
185        // so just use the clang-format fixes as the patched content.
186        let cache_format_fixes = cache_path.join(&file.name);
187        fs::create_dir_all(
188            cache_format_fixes
189                .parent()
190                .ok_or(ClangCaptureError::UnknownCacheParentPath)?,
191        )
192        .map_err(ClangCaptureError::MkDirFailed)?;
193        fs::write(&cache_format_fixes, &output.stdout).map_err(|e| {
194            ClangCaptureError::WriteFileFailed {
195                file_name: cache_format_fixes.to_string_lossy().to_string(),
196                source: e,
197            }
198        })?;
199        file.patched_path = Some(cache_format_fixes);
200    }
201
202    file.format_advice = Some(format_advice);
203    Ok(logs)
204}
205
206/// Essentially does a three way diff without the original source that generated the given `ranges` (simplified hunks).
207///
208/// The returned list of ranges are lines that need formatting in the clang-tidy patched file,
209/// provided by the `tidy_diff`. The given `ranges` are the line numbers in the original file
210/// that clang-tidy patched.
211fn three_way_diff(ranges: &[RangeInclusive<u32>], tidy_diff: Diff) -> Vec<RangeInclusive<u32>> {
212    // We're concerned about the formatting cases:
213    //
214    // 1. changes that clang-tidy made: `tidy_diff.hunks().after`
215    // 2. changes in the current CI event's diff (`ranges`)
216    //    that clang-tidy did not touch (`tidy_diff.hunks().before`)
217    // 3. changes that do not overlap clang-tidy fixes: `ranges` - `tidy_diff.hunks().before`
218    // 4. changes that overlap with clang-tidy fixes. This one is complex because
219    //    - tidy fixes can prefix an og range
220    //    - tidy fixes can suffix an og range
221    //    - tidy fixes can be contained within an og range
222    //    - multiple tidy fixes can (in order) suffix, be contained within, and prefix an og range
223    let mut joint_ranges = vec![];
224    let mut tidy_iter = tidy_diff.hunks().peekable();
225    let mut line_shift = 0i32;
226
227    /// Prevent pure removals from causing invalid inclusive ranges.
228    fn maybe_push_range(joint_ranges: &mut Vec<RangeInclusive<u32>>, start: u32, end: u32) {
229        if start <= end {
230            joint_ranges.push(RangeInclusive::new(start, end));
231        }
232    }
233
234    for og_range in ranges {
235        let og_start = *og_range.start();
236        let og_end = *og_range.end();
237
238        // track the start and end of a merged range that gets pushed into joint_ranges.
239        let mut merged_start = (og_start as i32 + line_shift) as u32;
240        let mut merged_end = (og_end as i32 + line_shift) as u32;
241
242        while let Some(tidy_hunk) = tidy_iter.peek() {
243            // alias for readability and prevent some repeated calculations
244            let before_start = tidy_hunk.before.start;
245            let before_end = tidy_hunk.before.end.saturating_sub(1);
246            let after_start = tidy_hunk.after.start;
247            let after_end = tidy_hunk.after.end.saturating_sub(1);
248            let delta = tidy_hunk.after.len() as i32 - tidy_hunk.before.len() as i32;
249
250            // The tidy hunk is a pure removal that exactly matches the og range.
251            if tidy_hunk.is_pure_removal() && before_start == og_start && before_end == og_end {
252                // Skip the og range and tidy hunk entirely.
253                // The line shift must still be adjusted for the pure removal though
254                line_shift += delta;
255                merged_end = 0; // causes invalid inclusive range which does not get pushed.
256                tidy_iter.next(); // skip this tidy hunk
257                break; // skip og range and iterate to the next one.
258            }
259
260            // tidy hunk is before the og range.
261            if before_end < og_start {
262                maybe_push_range(&mut joint_ranges, after_start, after_end);
263                line_shift += delta;
264                tidy_iter.next();
265                continue;
266            }
267
268            // tidy hunk is after the og range.
269            if before_start > og_end {
270                // handle the og range before iterating the next tidy hunk
271                break;
272            }
273
274            // tidy hunk overlaps with the og range in some way (case 4).
275            if tidy_hunk.before.contains(&og_start) {
276                merged_start = after_start;
277            }
278
279            // commit the line shift now that the tidy hunk start is checked.
280            line_shift += delta;
281
282            // tidy hunk suffixes the og range.
283            if tidy_hunk.before.contains(&og_end) {
284                merged_end = after_end;
285                tidy_iter.next(); // this tidy hunk is handled.
286                break; // break from loop to push the merged range into joint_ranges.
287            }
288
289            // tidy hunk is contained within the og range.
290            // so adjust the og range end accordingly and continue iterating tidy hunks
291            merged_end = (og_end as i32 + line_shift) as u32;
292            tidy_iter.next();
293        }
294
295        maybe_push_range(&mut joint_ranges, merged_start, merged_end);
296    }
297
298    // handle any remaining tidy hunks that are after all og ranges.
299    for tidy_hunk in tidy_iter {
300        maybe_push_range(
301            &mut joint_ranges,
302            tidy_hunk.after.start,
303            tidy_hunk.after.end.saturating_sub(1),
304        );
305    }
306
307    joint_ranges
308}
309
310#[cfg(test)]
311mod tests {
312    #![allow(clippy::unwrap_used)]
313
314    use std::ops::RangeInclusive;
315
316    use gix_imara_diff::{Diff, InternedInput};
317
318    use super::{summarize_style, three_way_diff};
319
320    fn formalize_style(style: &str, expected: &str) {
321        assert_eq!(summarize_style(style), expected);
322    }
323
324    #[test]
325    fn formalize_llvm_style() {
326        formalize_style("llvm", "LLVM");
327    }
328
329    #[test]
330    fn formalize_google_style() {
331        formalize_style("google", "Google");
332    }
333
334    #[test]
335    fn formalize_custom_style() {
336        formalize_style("file", "Custom");
337    }
338
339    #[test]
340    fn three_way_diff_mixed() {
341        const OG_SRC: &str =
342            "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11";
343        // TIDY_SRC replaces line3->StringA (hunk before=2..3) and
344        // line8+line9+line10->StringB+StringC (hunk before=7..10), then appends StringE.
345        // The second hunk's before=7..10 contains og_end=9 but not og_start=6,
346        // which exercises the "tidy hunk suffixes og range" branch.
347        const TIDY_SRC: &str =
348            "line1\nline2\nStringA\nline4\nline5\nline6\nline7\nStringB\nStringC\nline11\nStringE";
349        let input = InternedInput::new(OG_SRC, TIDY_SRC);
350        let mut tidy_diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input);
351        tidy_diff.postprocess_lines(&input);
352        let ranges = vec![RangeInclusive::new(2, 4), RangeInclusive::new(6, 9)];
353        println!("tidy diff: {tidy_diff:#?}\ncompared to og ranges: {ranges:?}");
354        let joint_ranges = three_way_diff(&ranges, tidy_diff);
355        println!("joint ranges: {joint_ranges:#?}");
356        assert_eq!(joint_ranges, vec![2..=4, 6..=10]);
357    }
358
359    #[test]
360    fn three_way_diff_separated() {
361        const OG_SRC: &str =
362            "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11";
363        // TIDY_SRC removes "line3" (index 2) which decrements offsets in ranges[5,8] and removes ranges[2,2].
364        // TIDY_SRC appends StringE, which handles remaining tidy hunks after done iterating ranges
365        const TIDY_SRC: &str =
366            "line1\nline2\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11\nStringE";
367        let input = InternedInput::new(OG_SRC, TIDY_SRC);
368        let mut tidy_diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input);
369        tidy_diff.postprocess_lines(&input);
370        let ranges = vec![2..=2, 5..=8];
371        println!("tidy diff: {tidy_diff:#?}\ncompared to og ranges: {ranges:?}");
372        let joint_ranges = three_way_diff(&ranges, tidy_diff);
373        println!("joint ranges: {joint_ranges:#?}");
374        assert_eq!(joint_ranges, vec![4..=7, 9..=10]);
375    }
376}