Skip to main content

big_code_analysis/output/
checkstyle.rs

1//! Checkstyle 4.3 XML writer for [`OffenderRecord`] batches.
2//!
3//! Checkstyle is the de-facto interchange format for Jenkins, SonarQube,
4//! GitLab, and most "warnings plugin" CI integrations. We emit a single
5//! XML document covering every offender, grouped by source path:
6//!
7//! ```xml
8//! <?xml version="1.0" encoding="UTF-8"?>
9//! <checkstyle version="4.3">
10//!   <file name="src/foo.rs">
11//!     <error line="42" column="5" severity="warning"
12//!            message="cyclomatic 17 exceeds limit 15"
13//!            source="big-code-analysis.cyclomatic"/>
14//!   </file>
15//! </checkstyle>
16//! ```
17//!
18//! XML escaping is hand-rolled because the surface is tiny (five
19//! entities in attribute values) and adding a new dependency is not
20//! worth it for that.
21
22#![allow(clippy::doc_markdown)]
23
24use std::collections::BTreeMap;
25use std::io::{self, Write};
26
27use crate::output::offenders::{OffenderRecord, TOOL_ID, warn_non_utf8_path};
28
29/// Write Checkstyle 4.3 XML for `offenders` to `writer`.
30///
31/// Offenders are grouped by `path` (sorted lexicographically by the
32/// UTF-8 representation; non-UTF-8 paths are skipped with a warning to
33/// stderr) so the output is deterministic and snapshot-friendly. Within
34/// a file, errors retain their input order.
35///
36/// The empty case still emits a well-formed `<checkstyle version="4.3"/>`
37/// document so consumers can rely on a non-empty file always being
38/// parseable.
39///
40/// # Errors
41///
42/// Propagates any [`io::Error`] returned by `writer` while emitting
43/// the XML envelope, the per-file `<file>` blocks, or their contained
44/// `<error>` elements.
45pub fn write_checkstyle<W: Write>(offenders: &[OffenderRecord], mut writer: W) -> io::Result<()> {
46    writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?;
47
48    // Group while preserving per-file insertion order. BTreeMap key is
49    // the UTF-8 path; this also gives us deterministic file ordering.
50    let mut by_file: BTreeMap<&str, Vec<&OffenderRecord>> = BTreeMap::new();
51    for record in offenders {
52        let Some(path_str) = warn_non_utf8_path("Checkstyle", &record.path) else {
53            continue;
54        };
55        by_file.entry(path_str).or_default().push(record);
56    }
57
58    // Empty input *and* all-non-UTF-8 input both end up here with an
59    // empty `by_file`, so one branch covers both cases.
60    if by_file.is_empty() {
61        writer.write_all(b"<checkstyle version=\"4.3\"/>\n")?;
62        return Ok(());
63    }
64
65    writer.write_all(b"<checkstyle version=\"4.3\">\n")?;
66    for (path_str, records) in by_file {
67        writeln!(writer, "  <file name=\"{}\">", XmlAttr(path_str))?;
68        for record in records {
69            write_error(&mut writer, record)?;
70        }
71        writer.write_all(b"  </file>\n")?;
72    }
73    writer.write_all(b"</checkstyle>\n")
74}
75
76fn write_error<W: Write>(writer: &mut W, record: &OffenderRecord) -> io::Result<()> {
77    let message = record.default_message();
78    write!(writer, "    <error line=\"{}\"", record.start_line.max(1))?;
79    if let Some(col) = record.start_col {
80        write!(writer, " column=\"{col}\"")?;
81    }
82    writeln!(
83        writer,
84        " severity=\"{}\" message=\"{}\" source=\"{}.{}\"/>",
85        record.severity.as_str(),
86        XmlAttr(&message),
87        TOOL_ID,
88        XmlAttr(&record.metric),
89    )
90}
91
92/// Format adapter that XML-escapes attribute values. We escape the five
93/// XML predefined entities, emit numeric character references for TAB
94/// / LF / CR (which XML 1.0 §3.3.3 attribute-value normalization would
95/// otherwise collapse to a single space), and replace remaining C0
96/// controls with `?` so the output stays a well-formed XML 1.0 document
97/// (lossy but predictable).
98struct XmlAttr<'a>(&'a str);
99
100impl std::fmt::Display for XmlAttr<'_> {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        // Unify every per-char escape behind a single `f.write_str` so the
103        // `?` operator fires once per iteration instead of once per arm —
104        // each `?` is a counted exit on the per-function `nexits` budget.
105        // The 4-byte stack buffer covers every UTF-8 scalar; `encode_utf8`
106        // borrows from it for the default arm so all arms unify as `&str`.
107        let mut buf = [0u8; 4];
108        for ch in self.0.chars() {
109            let escaped: &str = match ch {
110                '&' => "&amp;",
111                '<' => "&lt;",
112                '>' => "&gt;",
113                '"' => "&quot;",
114                '\'' => "&apos;",
115                // XML 1.0 §3.3.3 mandates attribute-value normalization:
116                // a conforming parser collapses literal TAB / LF / CR
117                // bytes inside an attribute value to a single space on
118                // read. To round-trip these characters intact (POSIX
119                // paths may contain newlines, and future message
120                // templates may span lines), emit numeric character
121                // references — they are exempt from normalization.
122                '\t' => "&#x9;",
123                '\n' => "&#xA;",
124                '\r' => "&#xD;",
125                c if (c as u32) < 0x20 => "?",
126                c => c.encode_utf8(&mut buf),
127            };
128            f.write_str(escaped)?;
129        }
130        Ok(())
131    }
132}
133
134#[cfg(test)]
135#[allow(
136    clippy::float_cmp,
137    clippy::cast_precision_loss,
138    clippy::cast_possible_truncation,
139    clippy::cast_sign_loss,
140    clippy::similar_names,
141    clippy::doc_markdown,
142    clippy::needless_raw_string_hashes,
143    clippy::too_many_lines
144)]
145mod tests {
146    use super::*;
147    use crate::output::offenders::Severity;
148    use std::path::PathBuf;
149
150    fn rec(path: &str, metric: &str, value: f64, limit: f64) -> OffenderRecord {
151        OffenderRecord {
152            path: PathBuf::from(path),
153            function: Some("f".into()),
154            start_line: 42,
155            end_line: 50,
156            start_col: Some(5),
157            metric: metric.into(),
158            value,
159            limit,
160            severity: Severity::Warning,
161        }
162    }
163
164    fn render(offenders: &[OffenderRecord]) -> String {
165        let mut buf = Vec::new();
166        write_checkstyle(offenders, &mut buf).expect("writing to Vec is infallible");
167        String::from_utf8(buf).expect("output is UTF-8")
168    }
169
170    #[test]
171    fn empty_emits_self_closing_root() {
172        insta::assert_snapshot!(render(&[]), @r###"
173        <?xml version="1.0" encoding="UTF-8"?>
174        <checkstyle version="4.3"/>
175        "###);
176    }
177
178    #[test]
179    fn single_offender_round_trips() {
180        let offenders = vec![rec("src/foo.rs", "cyclomatic", 17.0, 15.0)];
181        insta::assert_snapshot!(render(&offenders), @r###"
182        <?xml version="1.0" encoding="UTF-8"?>
183        <checkstyle version="4.3">
184          <file name="src/foo.rs">
185            <error line="42" column="5" severity="warning" message="cyclomatic 17 exceeds limit 15" source="big-code-analysis.cyclomatic"/>
186          </file>
187        </checkstyle>
188        "###);
189    }
190
191    #[test]
192    fn multiple_files_grouped_alphabetically() {
193        let offenders = vec![
194            rec("src/zeta.rs", "cyclomatic", 20.0, 15.0),
195            rec("src/alpha.rs", "loc.lloc", 250.0, 100.0),
196            rec("src/alpha.rs", "halstead.volume", 1234.5, 1000.0),
197        ];
198        insta::assert_snapshot!(render(&offenders), @r###"
199        <?xml version="1.0" encoding="UTF-8"?>
200        <checkstyle version="4.3">
201          <file name="src/alpha.rs">
202            <error line="42" column="5" severity="warning" message="loc.lloc 250 exceeds limit 100" source="big-code-analysis.loc.lloc"/>
203            <error line="42" column="5" severity="warning" message="halstead.volume 1234.5 exceeds limit 1000" source="big-code-analysis.halstead.volume"/>
204          </file>
205          <file name="src/zeta.rs">
206            <error line="42" column="5" severity="warning" message="cyclomatic 20 exceeds limit 15" source="big-code-analysis.cyclomatic"/>
207          </file>
208        </checkstyle>
209        "###);
210    }
211
212    #[test]
213    fn error_severity_renders_as_error() {
214        let mut r = rec("a.rs", "cyclomatic", 99.0, 15.0);
215        r.severity = Severity::Error;
216        let out = render(&[r]);
217        assert!(out.contains(r#"severity="error""#), "{out}");
218    }
219
220    #[test]
221    fn missing_column_omits_attribute() {
222        let mut r = rec("a.rs", "cyclomatic", 17.0, 15.0);
223        r.start_col = None;
224        let out = render(&[r]);
225        assert!(!out.contains("column="), "{out}");
226        assert!(out.contains(r#"line="42""#), "{out}");
227    }
228
229    #[test]
230    fn xml_special_chars_in_path_and_metric_are_escaped() {
231        let r = OffenderRecord {
232            path: PathBuf::from(r#"src/<a&b>"c'd.rs"#),
233            function: None,
234            start_line: 1,
235            end_line: 1,
236            start_col: None,
237            metric: r#"weird"&<metric>"#.into(),
238            value: 1.0,
239            limit: 0.0,
240            severity: Severity::Warning,
241        };
242        let out = render(&[r]);
243        assert!(
244            out.contains(r#"name="src/&lt;a&amp;b&gt;&quot;c&apos;d.rs""#),
245            "{out}"
246        );
247        assert!(
248            out.contains(r#"source="big-code-analysis.weird&quot;&amp;&lt;metric&gt;""#),
249            "{out}"
250        );
251    }
252
253    #[test]
254    fn start_line_zero_is_clamped_to_one() {
255        let mut r = rec("a.rs", "cyclomatic", 17.0, 15.0);
256        r.start_line = 0;
257        let out = render(&[r]);
258        assert!(out.contains(r#"line="1""#), "{out}");
259    }
260
261    #[test]
262    fn control_characters_in_message_replaced() {
263        let r = OffenderRecord {
264            path: PathBuf::from("a.rs"),
265            function: None,
266            start_line: 1,
267            end_line: 1,
268            start_col: None,
269            // metric name carries a NUL — bizarre, but escape must keep
270            // the document well-formed.
271            metric: "weird\u{0001}name".into(),
272            value: 1.0,
273            limit: 0.0,
274            severity: Severity::Warning,
275        };
276        let out = render(&[r]);
277        assert!(out.contains("weird?name"), "{out}");
278    }
279
280    #[test]
281    fn whitespace_in_attribute_round_trips_via_numeric_refs() {
282        use quick_xml::events::Event;
283        use quick_xml::reader::Reader;
284
285        // XML 1.0 §3.3.3: a conforming parser collapses raw TAB / LF /
286        // CR inside an attribute value to a single space on read. POSIX
287        // paths legally contain '\n', so emitting them as literal bytes
288        // would silently mangle every offender that lands on such a
289        // file. Numeric character references are exempt from this
290        // normalization — emit them and the parser-visible value
291        // matches what we wrote.
292        let r = OffenderRecord {
293            path: PathBuf::from("src/weird\npath\twith\rwhitespace.rs"),
294            function: None,
295            start_line: 1,
296            end_line: 1,
297            start_col: None,
298            metric: "cyclomatic".into(),
299            value: 1.0,
300            limit: 0.0,
301            severity: Severity::Warning,
302        };
303        let out = render(&[r]);
304
305        // Emitter side: the three whitespace bytes must appear as
306        // numeric character references, never as literal bytes inside
307        // the attribute.
308        assert!(out.contains("&#xA;"), "missing &#xA; (LF) in {out}");
309        assert!(out.contains("&#x9;"), "missing &#x9; (TAB) in {out}");
310        assert!(out.contains("&#xD;"), "missing &#xD; (CR) in {out}");
311        // The `name="..."` attribute itself must not contain a raw LF /
312        // TAB / CR — otherwise attribute-value normalization would
313        // collapse it to a space on read.
314        let name_open = out.find("name=\"").expect("name attribute present");
315        let after_open = &out[name_open + b"name=\"".len()..];
316        let name_close = after_open.find('"').expect("name attribute closed");
317        let attr_lit = &after_open[..name_close];
318        assert!(
319            !attr_lit.contains('\n') && !attr_lit.contains('\t') && !attr_lit.contains('\r'),
320            "raw whitespace leaked into attribute literal: {attr_lit:?}"
321        );
322
323        // Parser side: re-parse with quick-xml and confirm the
324        // round-tripped value still contains the original raw bytes.
325        let mut reader = Reader::from_str(&out);
326        let mut buf = Vec::new();
327        let mut roundtripped: Option<String> = None;
328        loop {
329            match reader.read_event_into(&mut buf).expect("well-formed XML") {
330                Event::Start(start) | Event::Empty(start) if start.name().as_ref() == b"file" => {
331                    for attr in start.attributes().with_checks(false).flatten() {
332                        if attr.key.as_ref() == b"name" {
333                            roundtripped = Some(
334                                attr.unescape_value()
335                                    .expect("attribute value decodes")
336                                    .into_owned(),
337                            );
338                        }
339                    }
340                }
341                Event::Eof => break,
342                _ => {}
343            }
344            buf.clear();
345        }
346        let roundtripped = roundtripped.expect("found <file name=...>");
347        assert_eq!(roundtripped, "src/weird\npath\twith\rwhitespace.rs");
348    }
349
350    #[test]
351    fn predefined_entities_still_escape_after_whitespace_fix() {
352        // Regression guard: tightening the TAB/LF/CR arms must not
353        // disturb the five predefined-entity escapes that this format
354        // has always emitted.
355        let r = OffenderRecord {
356            path: PathBuf::from("a&b<c>d\"e'f.rs"),
357            function: None,
358            start_line: 1,
359            end_line: 1,
360            start_col: None,
361            metric: "cyclomatic".into(),
362            value: 1.0,
363            limit: 0.0,
364            severity: Severity::Warning,
365        };
366        let out = render(&[r]);
367        assert!(
368            out.contains(r#"name="a&amp;b&lt;c&gt;d&quot;e&apos;f.rs""#),
369            "predefined-entity escapes regressed: {out}"
370        );
371    }
372}