Skip to main content

big_code_analysis/output/
checkstyle.rs

1//! Checkstyle 4.3 XML writer for [`OffenderRecord`] batches.
2//!
3//! Checkstyle is the de-facto interchange format for Jenkins, SonarQube,
4//! GitLab, and most "warnings plugin" CI integrations. We emit a single
5//! XML document covering every offender, grouped by source path:
6//!
7//! ```xml
8//! <?xml version="1.0" encoding="UTF-8"?>
9//! <checkstyle version="4.3">
10//!   <file name="src/foo.rs">
11//!     <error line="42" column="5" severity="warning"
12//!            message="cyclomatic 17 exceeds limit 15"
13//!            source="big-code-analysis.cyclomatic"/>
14//!   </file>
15//! </checkstyle>
16//! ```
17//!
18//! XML escaping is hand-rolled because the surface is tiny (five
19//! entities in attribute values) and adding a new dependency is not
20//! worth it for that.
21
22#![allow(clippy::doc_markdown)]
23
24use std::collections::BTreeMap;
25use std::io::{self, Write};
26
27use crate::output::offenders::{OffenderRecord, TOOL_ID, warn_non_utf8_path};
28
29/// Write Checkstyle 4.3 XML for `offenders` to `writer`.
30///
31/// Offenders are grouped by `path` (sorted lexicographically by the
32/// UTF-8 representation; non-UTF-8 paths are skipped with a warning to
33/// stderr) so the output is deterministic and snapshot-friendly. Within
34/// a file, errors retain their input order.
35///
36/// The empty case still emits a well-formed `<checkstyle version="4.3"/>`
37/// document so consumers can rely on a non-empty file always being
38/// parseable.
39///
40/// # Errors
41///
42/// Propagates any [`io::Error`] returned by `writer` while emitting
43/// the XML envelope, the per-file `<file>` blocks, or their contained
44/// `<error>` elements.
45pub fn write_checkstyle<W: Write>(offenders: &[OffenderRecord], mut writer: W) -> io::Result<()> {
46    writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
47
48    // Group while preserving per-file insertion order. BTreeMap key is
49    // the UTF-8 path; this also gives us deterministic file ordering.
50    let mut by_file: BTreeMap<&str, Vec<&OffenderRecord>> = BTreeMap::new();
51    for record in offenders {
52        let Some(path_str) = warn_non_utf8_path("Checkstyle", &record.path) else {
53            continue;
54        };
55        by_file.entry(path_str).or_default().push(record);
56    }
57
58    // Empty input *and* all-non-UTF-8 input both end up here with an
59    // empty `by_file`, so one branch covers both cases.
60    if by_file.is_empty() {
61        writer.write_all(b"<checkstyle version=\"4.3\"/>\n")?;
62        return Ok(());
63    }
64
65    writer.write_all(b"<checkstyle version=\"4.3\">\n")?;
66    for (path_str, records) in by_file {
67        writeln!(writer, "  <file name=\"{}\">", XmlAttr(path_str))?;
68        for record in records {
69            write_error(&mut writer, record)?;
70        }
71        writer.write_all(b"  </file>\n")?;
72    }
73    writer.write_all(b"</checkstyle>\n")
74}
75
76fn write_error<W: Write>(writer: &mut W, record: &OffenderRecord) -> io::Result<()> {
77    let message = record.default_message();
78    write!(writer, "    <error line=\"{}\"", record.start_line.max(1))?;
79    if let Some(col) = record.start_col {
80        write!(writer, " column=\"{col}\"")?;
81    }
82    writeln!(
83        writer,
84        " severity=\"{}\" message=\"{}\" source=\"{}.{}\"/>",
85        record.severity.as_str(),
86        XmlAttr(&message),
87        TOOL_ID,
88        XmlAttr(&record.metric),
89    )
90}
91
92/// Format adapter that XML-escapes attribute values. We escape the five
93/// XML predefined entities plus control characters that are not allowed
94/// in XML 1.0 attribute values (we replace them with `?` so the output
95/// remains a valid document; lossy but predictable).
96struct XmlAttr<'a>(&'a str);
97
98impl std::fmt::Display for XmlAttr<'_> {
99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100        use std::fmt::Write as _;
101        for ch in self.0.chars() {
102            match ch {
103                '&' => f.write_str("&amp;")?,
104                '<' => f.write_str("&lt;")?,
105                '>' => f.write_str("&gt;")?,
106                '"' => f.write_str("&quot;")?,
107                '\'' => f.write_str("&apos;")?,
108                // Tab, newline, CR are legal in attribute values but
109                // CI consumers are friendlier when newlines stay
110                // literal — keep them as-is. Other C0 controls are
111                // illegal in XML 1.0; replace with '?' rather than
112                // emit a malformed document.
113                '\t' | '\n' | '\r' => f.write_char(ch)?,
114                c if (c as u32) < 0x20 => f.write_char('?')?,
115                c => f.write_char(c)?,
116            }
117        }
118        Ok(())
119    }
120}
121
122#[cfg(test)]
123#[allow(
124    clippy::float_cmp,
125    clippy::cast_precision_loss,
126    clippy::cast_possible_truncation,
127    clippy::cast_sign_loss,
128    clippy::similar_names,
129    clippy::doc_markdown,
130    clippy::needless_raw_string_hashes,
131    clippy::too_many_lines
132)]
133mod tests {
134    use super::*;
135    use crate::output::offenders::Severity;
136    use std::path::PathBuf;
137
138    fn rec(path: &str, metric: &str, value: f64, limit: f64) -> OffenderRecord {
139        OffenderRecord {
140            path: PathBuf::from(path),
141            function: Some("f".into()),
142            start_line: 42,
143            end_line: 50,
144            start_col: Some(5),
145            metric: metric.into(),
146            value,
147            limit,
148            severity: Severity::Warning,
149        }
150    }
151
152    fn render(offenders: &[OffenderRecord]) -> String {
153        let mut buf = Vec::new();
154        write_checkstyle(offenders, &mut buf).expect("writing to Vec is infallible");
155        String::from_utf8(buf).expect("output is UTF-8")
156    }
157
158    #[test]
159    fn empty_emits_self_closing_root() {
160        insta::assert_snapshot!(render(&[]), @r###"
161        <?xml version="1.0" encoding="UTF-8"?>
162        <checkstyle version="4.3"/>
163        "###);
164    }
165
166    #[test]
167    fn single_offender_round_trips() {
168        let offenders = vec![rec("src/foo.rs", "cyclomatic", 17.0, 15.0)];
169        insta::assert_snapshot!(render(&offenders), @r###"
170        <?xml version="1.0" encoding="UTF-8"?>
171        <checkstyle version="4.3">
172          <file name="src/foo.rs">
173            <error line="42" column="5" severity="warning" message="cyclomatic 17 exceeds limit 15" source="big-code-analysis.cyclomatic"/>
174          </file>
175        </checkstyle>
176        "###);
177    }
178
179    #[test]
180    fn multiple_files_grouped_alphabetically() {
181        let offenders = vec![
182            rec("src/zeta.rs", "cyclomatic", 20.0, 15.0),
183            rec("src/alpha.rs", "loc.lloc", 250.0, 100.0),
184            rec("src/alpha.rs", "halstead.volume", 1234.5, 1000.0),
185        ];
186        insta::assert_snapshot!(render(&offenders), @r###"
187        <?xml version="1.0" encoding="UTF-8"?>
188        <checkstyle version="4.3">
189          <file name="src/alpha.rs">
190            <error line="42" column="5" severity="warning" message="loc.lloc 250 exceeds limit 100" source="big-code-analysis.loc.lloc"/>
191            <error line="42" column="5" severity="warning" message="halstead.volume 1234.5 exceeds limit 1000" source="big-code-analysis.halstead.volume"/>
192          </file>
193          <file name="src/zeta.rs">
194            <error line="42" column="5" severity="warning" message="cyclomatic 20 exceeds limit 15" source="big-code-analysis.cyclomatic"/>
195          </file>
196        </checkstyle>
197        "###);
198    }
199
200    #[test]
201    fn error_severity_renders_as_error() {
202        let mut r = rec("a.rs", "cyclomatic", 99.0, 15.0);
203        r.severity = Severity::Error;
204        let out = render(&[r]);
205        assert!(out.contains(r#"severity="error""#), "{out}");
206    }
207
208    #[test]
209    fn missing_column_omits_attribute() {
210        let mut r = rec("a.rs", "cyclomatic", 17.0, 15.0);
211        r.start_col = None;
212        let out = render(&[r]);
213        assert!(!out.contains("column="), "{out}");
214        assert!(out.contains(r#"line="42""#), "{out}");
215    }
216
217    #[test]
218    fn xml_special_chars_in_path_and_metric_are_escaped() {
219        let r = OffenderRecord {
220            path: PathBuf::from(r#"src/<a&b>"c'd.rs"#),
221            function: None,
222            start_line: 1,
223            end_line: 1,
224            start_col: None,
225            metric: r#"weird"&<metric>"#.into(),
226            value: 1.0,
227            limit: 0.0,
228            severity: Severity::Warning,
229        };
230        let out = render(&[r]);
231        assert!(
232            out.contains(r#"name="src/&lt;a&amp;b&gt;&quot;c&apos;d.rs""#),
233            "{out}"
234        );
235        assert!(
236            out.contains(r#"source="big-code-analysis.weird&quot;&amp;&lt;metric&gt;""#),
237            "{out}"
238        );
239    }
240
241    #[test]
242    fn start_line_zero_is_clamped_to_one() {
243        let mut r = rec("a.rs", "cyclomatic", 17.0, 15.0);
244        r.start_line = 0;
245        let out = render(&[r]);
246        assert!(out.contains(r#"line="1""#), "{out}");
247    }
248
249    #[test]
250    fn control_characters_in_message_replaced() {
251        let r = OffenderRecord {
252            path: PathBuf::from("a.rs"),
253            function: None,
254            start_line: 1,
255            end_line: 1,
256            start_col: None,
257            // metric name carries a NUL — bizarre, but escape must keep
258            // the document well-formed.
259            metric: "weird\u{0001}name".into(),
260            value: 1.0,
261            limit: 0.0,
262            severity: Severity::Warning,
263        };
264        let out = render(&[r]);
265        assert!(out.contains("weird?name"), "{out}");
266    }
267}