Skip to main content

fallow_cli/report/ci/
pr_comment.rs

1use std::fmt::Write as _;
2use std::process::ExitCode;
3use std::sync::OnceLock;
4
5use serde_json::Value;
6
7/// Workspace name, set once by `main()` when the binary is invoked with
8/// `--workspace <name>`. Read by `sticky_marker_id` to auto-suffix the
9/// sticky-comment marker per workspace, which keeps parallel per-workspace
10/// jobs from racing each other's sticky body on the same PR/MR.
11///
12/// `OnceLock` gives us safe cross-function read-after-set without env-var
13/// indirection. Only main writes; readers always observe the post-CLI-parse
14/// state.
15static WORKSPACE_MARKER: OnceLock<String> = OnceLock::new();
16
17/// Set the workspace marker from a `--workspace` selection list.
18///
19/// Single workspace -> the name itself, sanitised for marker grammar.
20/// N>1 workspaces -> a stable 6-char hex hash of the sorted, comma-joined
21/// list, prefixed with `w-`. Sort + join is deterministic so the same
22/// selection produces the same suffix across runs; two jobs with disjoint
23/// selections get distinct markers and don't race.
24#[allow(
25    dead_code,
26    reason = "called from main.rs bin target; lib target sees no caller"
27)]
28pub fn set_workspace_marker_from_list(values: &[String]) {
29    let trimmed: Vec<&str> = values
30        .iter()
31        .map(|value| value.trim())
32        .filter(|value| !value.is_empty())
33        .collect();
34    if trimmed.is_empty() {
35        return;
36    }
37    let marker = if let [single] = trimmed.as_slice() {
38        (*single).to_owned()
39    } else {
40        let mut sorted = trimmed.iter().map(|s| (*s).to_owned()).collect::<Vec<_>>();
41        sorted.sort();
42        let joined = sorted.join(",");
43        format!("w-{}", short_hex_hash(&joined))
44    };
45    let _ = WORKSPACE_MARKER.set(marker);
46}
47
48/// 6-char FNV-1a hex digest. Stable across Rust versions (FNV is content-
49/// determined), short enough for a marker suffix, wide enough that the
50/// chance of two real-world workspace selections colliding is ~1/16M.
51fn short_hex_hash(value: &str) -> String {
52    let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
53    for byte in value.bytes() {
54        hash ^= u64::from(byte);
55        hash = hash.wrapping_mul(0x0100_0000_01b3);
56    }
57    format!("{:06x}", (hash & 0x00ff_ffff) as u32)
58}
59
60#[derive(Clone, Copy, Debug, PartialEq, Eq)]
61pub enum Provider {
62    Github,
63    Gitlab,
64}
65
66impl Provider {
67    #[must_use]
68    pub const fn name(self) -> &'static str {
69        match self {
70            Self::Github => "GitHub",
71            Self::Gitlab => "GitLab",
72        }
73    }
74}
75
76#[derive(Clone, Debug, PartialEq, Eq)]
77pub struct CiIssue {
78    pub rule_id: String,
79    pub description: String,
80    pub severity: String,
81    pub path: String,
82    pub line: u64,
83    pub fingerprint: String,
84}
85
86#[must_use]
87pub fn issues_from_codeclimate(value: &Value) -> Vec<CiIssue> {
88    let mut issues = value
89        .as_array()
90        .into_iter()
91        .flatten()
92        .filter_map(issue_from_codeclimate)
93        .collect::<Vec<_>>();
94    issues
95        .sort_by(|a, b| (&a.path, a.line, &a.fingerprint).cmp(&(&b.path, b.line, &b.fingerprint)));
96    issues
97}
98
99fn issue_from_codeclimate(value: &Value) -> Option<CiIssue> {
100    let path = value.pointer("/location/path")?.as_str()?.to_string();
101    let line = value
102        .pointer("/location/lines/begin")
103        .and_then(Value::as_u64)
104        .unwrap_or(1);
105    Some(CiIssue {
106        rule_id: value
107            .get("check_name")
108            .and_then(Value::as_str)
109            .unwrap_or("fallow/finding")
110            .to_string(),
111        description: value
112            .get("description")
113            .and_then(Value::as_str)
114            .unwrap_or("Fallow finding")
115            .to_string(),
116        severity: value
117            .get("severity")
118            .and_then(Value::as_str)
119            .unwrap_or("minor")
120            .to_string(),
121        fingerprint: value
122            .get("fingerprint")
123            .and_then(Value::as_str)
124            .unwrap_or("")
125            .to_string(),
126        path,
127        line,
128    })
129}
130
131#[must_use]
132pub fn render_pr_comment(command: &str, provider: Provider, issues: &[CiIssue]) -> String {
133    let marker_id = sticky_marker_id();
134    let marker = format!("<!-- fallow-id: {marker_id} -->");
135    let max = max_comments();
136    let title = command_title(command);
137    let count = issues.len();
138    let noun = if count == 1 { "finding" } else { "findings" };
139
140    let mut out = String::new();
141    out.push_str(&marker);
142    out.push('\n');
143    write!(&mut out, "### Fallow {title}\n\n").expect("write to string");
144    if count == 0 {
145        writeln!(
146            &mut out,
147            "No {provider} PR/MR findings.",
148            provider = provider.name()
149        )
150        .expect("write to string");
151    } else {
152        write!(&mut out, "Found **{count}** {noun}.\n\n").expect("write to string");
153        let groups = group_by_category(issues);
154        // Single-category invocations (e.g. `fallow check --format pr-comment-github`)
155        // get the original flat-table shape. Combined / multi-category runs get
156        // one collapsible section per category so reviewers can fold by area.
157        if groups.len() == 1 {
158            render_findings_table(&mut out, issues, max, "Details");
159        } else {
160            for (category, group_issues) in &groups {
161                let summary_label = summary_label(category, group_issues.len(), max);
162                render_findings_table(&mut out, group_issues, max, &summary_label);
163            }
164        }
165    }
166    out.push_str("\nGenerated by fallow.");
167    out
168}
169
170/// Build the `<details>` summary label for one category section. When the
171/// section is truncated by `max`, the label foreshadows the truncation
172/// (`Duplication (160, showing 50)`) so a reviewer expanding the section
173/// isn't surprised by the missing rows. When not truncated, the bare count
174/// reads as before.
175fn summary_label(category: &str, total: usize, max: usize) -> String {
176    if total > max {
177        format!("{category} ({total}, showing {max})")
178    } else {
179        format!("{category} ({total})")
180    }
181}
182
183fn render_findings_table(out: &mut String, issues: &[CiIssue], max: usize, summary: &str) {
184    writeln!(out, "<details>\n<summary>{summary}</summary>\n").expect("write to string");
185    out.push_str("| Severity | Rule | Location | Description |\n");
186    out.push_str("| --- | --- | --- | --- |\n");
187    for issue in issues.iter().take(max) {
188        writeln!(
189            out,
190            "| {} | `{}` | `{}`:{} | {} |",
191            escape_md(&issue.severity),
192            escape_md(&issue.rule_id),
193            escape_md(&issue.path),
194            issue.line,
195            escape_md(&issue.description),
196        )
197        .expect("write to string");
198    }
199    if issues.len() > max {
200        writeln!(
201            out,
202            "\nShowing {max} of {} findings. Run fallow locally or inspect the CI output for the full report.",
203            issues.len(),
204        )
205        .expect("write to string");
206    }
207    out.push_str("\n</details>\n\n");
208}
209
210/// Map a fallow rule id to its category for sticky-comment grouping.
211///
212/// Single source of truth lives on `RuleDef::category` in `explain.rs`. This
213/// helper does the lookup so callers don't need to know about the registry;
214/// the look-up-then-fallback shape also keeps the renderer working for
215/// rules a downstream consumer added without registering (rare; produces
216/// the conservative "Dead code" default).
217#[must_use]
218pub fn category_for_rule(rule_id: &str) -> &'static str {
219    crate::explain::rule_by_id(rule_id).map_or("Dead code", |def| def.category)
220}
221
222/// Stable category ordering for the sticky comment. Reviewers see categories
223/// in the same order across PRs / runs, which matters for muscle memory.
224const CATEGORY_ORDER: [&str; 6] = [
225    "Dead code",
226    "Dependencies",
227    "Duplication",
228    "Health",
229    "Architecture",
230    "Suppressions",
231];
232
233fn group_by_category(issues: &[CiIssue]) -> Vec<(&'static str, Vec<CiIssue>)> {
234    let mut buckets: std::collections::BTreeMap<&'static str, Vec<CiIssue>> =
235        std::collections::BTreeMap::new();
236    for issue in issues {
237        let category = category_for_rule(&issue.rule_id);
238        buckets.entry(category).or_default().push(issue.clone());
239    }
240    let mut ordered: Vec<(&'static str, Vec<CiIssue>)> = Vec::with_capacity(buckets.len());
241    // Emit known categories in the declared order first.
242    for category in CATEGORY_ORDER {
243        if let Some(items) = buckets.remove(category) {
244            ordered.push((category, items));
245        }
246    }
247    // Anything left over (future categories not yet ordered) goes after.
248    for (category, items) in buckets {
249        ordered.push((category, items));
250    }
251    ordered
252}
253
254fn max_comments() -> usize {
255    std::env::var("FALLOW_MAX_COMMENTS")
256        .ok()
257        .and_then(|value| value.parse::<usize>().ok())
258        .unwrap_or(50)
259}
260
261/// Compute the sticky-comment marker id. Precedence (highest first):
262///
263/// 1. `FALLOW_COMMENT_ID` set by the user explicitly: use as-is.
264/// 2. `WORKSPACE_MARKER` populated by `main()` from `--workspace <name>`:
265///    suffix the default to avoid colliding with a sibling per-workspace
266///    job's sticky on the same PR/MR.
267/// 3. Plain `fallow-results`.
268///
269/// The collision case (2) is the common monorepo shape: parallel jobs each
270/// run fallow scoped to one workspace package and post their own sticky.
271/// Without a per-workspace suffix every job edits the same marker, racing
272/// each other's bodies on every CI re-run.
273fn sticky_marker_id() -> String {
274    if let Ok(value) = std::env::var("FALLOW_COMMENT_ID")
275        && !value.trim().is_empty()
276    {
277        return value;
278    }
279    let suffix = WORKSPACE_MARKER
280        .get()
281        .map(|value| value.trim())
282        .filter(|value| !value.is_empty())
283        .map(sanitize_marker_segment);
284    match suffix {
285        Some(workspace) => format!("fallow-results-{workspace}"),
286        None => "fallow-results".to_owned(),
287    }
288}
289
290/// Strip characters that would break the HTML-comment marker. The marker
291/// shape is `<!-- fallow-id: <id> -->`; `<`, `>`, and `--` are reserved by
292/// the HTML comment grammar, and whitespace would split the id when the
293/// reader scans for it.
294fn sanitize_marker_segment(value: &str) -> String {
295    value
296        .chars()
297        .map(|ch| {
298            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
299                ch
300            } else {
301                '-'
302            }
303        })
304        .collect::<String>()
305        .trim_matches('-')
306        .to_owned()
307}
308
309#[must_use]
310pub fn print_pr_comment(command: &str, provider: Provider, codeclimate: &Value) -> ExitCode {
311    let issues = super::diff_filter::filter_issues_from_env(issues_from_codeclimate(codeclimate));
312    println!("{}", render_pr_comment(command, provider, &issues));
313    ExitCode::SUCCESS
314}
315
316#[must_use]
317pub fn command_title(command: &str) -> &'static str {
318    match command {
319        "dead-code" | "check" => "dead-code report",
320        "dupes" => "duplication report",
321        "health" => "health report",
322        "audit" => "audit report",
323        "" | "combined" => "combined report",
324        _ => "report",
325    }
326}
327
328/// Escape a string for inclusion in a Markdown table cell.
329///
330/// Table cells render through GitHub-Flavored Markdown and GitLab Flavored
331/// Markdown as inline content, so cell-internal markers can flip the cell to
332/// emphasis, link, image, code, HTML, or strikethrough. Newlines collapse to
333/// spaces because a literal newline terminates the table row. The escape set
334/// covers every CommonMark inline construct that can fire mid-cell:
335///
336/// - `\` (escape character itself)
337/// - `` ` `` (inline code)
338/// - `*` `_` (emphasis / strong)
339/// - `[` `]` `(` `)` (link / image syntax)
340/// - `!` (image when followed by `[`)
341/// - `<` `>` (raw HTML / autolinks)
342/// - `#` (cell rendered as heading when first character of the cell)
343/// - `|` (table cell separator)
344/// - `~` (strikethrough on GFM)
345/// - `&` (HTML numeric / named entity decode: `&#42;` would otherwise
346///   render as `*` after our escape and reintroduce the bypass)
347///
348/// Line-start markers (`.`, `-`, `+`, `1.`) are intentionally NOT escaped:
349/// they are only meaningful at the start of a block-level line, and table
350/// cells render as paragraph-equivalent inline content where these are inert.
351/// Escaping them produces visually noisy output (`fallow/test\-only-dep`)
352/// without correctness benefit.
353#[must_use]
354pub fn escape_md(value: &str) -> String {
355    let collapsed = value.replace('\n', " ");
356    let mut out = String::with_capacity(collapsed.len());
357    for ch in collapsed.chars() {
358        if matches!(
359            ch,
360            '\\' | '`'
361                | '*'
362                | '_'
363                | '['
364                | ']'
365                | '('
366                | ')'
367                | '!'
368                | '<'
369                | '>'
370                | '#'
371                | '|'
372                | '~'
373                | '&'
374        ) {
375            out.push('\\');
376        }
377        out.push(ch);
378    }
379    out.trim().to_owned()
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn extracts_issues_from_codeclimate() {
388        let value = serde_json::json!([{
389            "check_name": "fallow/unused-export",
390            "description": "Export x is never imported",
391            "severity": "minor",
392            "fingerprint": "abc",
393            "location": { "path": "src/a.ts", "lines": { "begin": 7 } }
394        }]);
395        let issues = issues_from_codeclimate(&value);
396        assert_eq!(issues.len(), 1);
397        assert_eq!(issues[0].path, "src/a.ts");
398        assert_eq!(issues[0].line, 7);
399    }
400
401    #[test]
402    fn sticky_marker_id_default_when_nothing_set() {
403        // WORKSPACE_MARKER is a OnceLock that's set-once-per-process; tests
404        // can't unset it. We only assert about the unset branch when the
405        // OnceLock hasn't been touched, which is the case in this test if
406        // it's the first marker test to run. To keep tests order-independent
407        // we test sanitize_marker_segment + sticky_marker_id-with-mock
408        // separately rather than racing the OnceLock state.
409        let body = render_pr_comment("check", Provider::Github, &[]);
410        // The marker prefix is always `<!-- fallow-id: fallow-results`,
411        // regardless of whether a workspace suffix follows.
412        assert!(body.contains("<!-- fallow-id: fallow-results"));
413        assert!(body.contains("No GitHub PR/MR findings."));
414    }
415
416    #[test]
417    fn short_hex_hash_is_deterministic_and_six_chars() {
418        let a = short_hex_hash("api,worker");
419        assert_eq!(a.len(), 6);
420        // Same input -> same hash across calls.
421        assert_eq!(a, short_hex_hash("api,worker"));
422        // Different input -> different hash (modulo collision; the
423        // workspace-marker assertion is "monorepo with 2-10 distinct
424        // workspaces should not race", which a 6-hex-char suffix
425        // satisfies at ~1/16M collision rate).
426        assert_ne!(a, short_hex_hash("admin,web"));
427    }
428
429    #[test]
430    fn sanitize_marker_segment_collapses_unsafe_chars_to_dashes() {
431        // `@`, `/`, spaces, and other special chars all become `-`.
432        // Leading and trailing dashes are trimmed.
433        assert_eq!(sanitize_marker_segment("@fallow/runtime"), "fallow-runtime");
434        assert_eq!(
435            sanitize_marker_segment("packages/web ui"),
436            "packages-web-ui"
437        );
438        assert_eq!(sanitize_marker_segment("plain"), "plain");
439        assert_eq!(
440            sanitize_marker_segment("--leading-trailing--"),
441            "leading-trailing"
442        );
443    }
444
445    #[test]
446    fn escape_md_escapes_inline_commonmark_specials() {
447        // Inline-context CommonMark specials must escape: emphasis, links,
448        // images, code, HTML, headings (when first char of cell), pipes,
449        // strikethrough.
450        let raw = "foo*bar_baz [a](u) `c` <h> #x !i ~s | p";
451        let escaped = escape_md(raw);
452        for ch in [
453            '*', '_', '[', ']', '(', ')', '`', '<', '>', '#', '!', '~', '|',
454        ] {
455            let raw_count = raw.chars().filter(|c| c == &ch).count();
456            let escaped_count = escaped.matches(&format!("\\{ch}")).count();
457            assert_eq!(
458                raw_count, escaped_count,
459                "char {ch:?}: raw {raw_count} occurrences, escaped {escaped_count} in {escaped:?}"
460            );
461        }
462    }
463
464    #[test]
465    fn escape_md_escapes_ampersand_to_block_numeric_entity_bypass() {
466        // Without escaping `&`, a description containing `&#42;` would render
467        // as `*` AFTER our escape pass, reintroducing the emphasis-injection
468        // we explicitly defended against. Escaping the `&` (and `#`) breaks
469        // the entity so it renders literally.
470        let raw = "value &#42;suspicious&#42; here";
471        let escaped = escape_md(raw);
472        // Both `&` and `#` are escaped, so the entity becomes `\&\#42;`,
473        // which Markdown renders as a literal `&#42;` instead of a `*`.
474        assert!(escaped.contains(r"\&"), "got: {escaped}");
475        assert!(escaped.contains(r"\#"), "got: {escaped}");
476        // Defence-in-depth: the substring " *suspicious" only appears if
477        // the entity decoded; with both escapes in place it cannot.
478        assert!(!escaped.contains(" *suspicious"), "got: {escaped}");
479    }
480
481    #[test]
482    fn summary_label_foreshadows_truncation() {
483        // When the section is truncated, the <details> summary tells the
484        // reader BEFORE they click that fewer rows than the count appear.
485        assert_eq!(
486            summary_label("Duplication", 160, 50),
487            "Duplication (160, showing 50)"
488        );
489        // When the section fits, the bare count reads as before.
490        assert_eq!(summary_label("Health", 12, 50), "Health (12)");
491        assert_eq!(summary_label("Dependencies", 50, 50), "Dependencies (50)");
492    }
493
494    #[test]
495    fn escape_md_does_not_escape_block_only_markers() {
496        // `.`, `-`, `+` are only special at the start of a block-level line
497        // (ordered / unordered list markers). Table cells are inline; over-
498        // escaping these produces visually noisy `\-` / `\.` in the cell.
499        let raw = "fallow/test-only-dependency package.json:12";
500        let escaped = escape_md(raw);
501        assert!(!escaped.contains("\\-"), "should not escape `-`");
502        assert!(!escaped.contains("\\."), "should not escape `.`");
503        assert_eq!(escaped, raw);
504    }
505
506    #[test]
507    fn escape_md_collapses_newlines_to_spaces() {
508        // Table cells are single-line by construction; a literal newline in
509        // a description would terminate the row and break the table.
510        let raw = "first\nsecond\nthird";
511        assert_eq!(escape_md(raw), "first second third");
512    }
513
514    #[test]
515    fn escape_md_leaves_safe_chars_unchanged() {
516        // Plain alphanumeric, spaces, slashes, colons, equals, quotes: all
517        // legal inside a Markdown table cell.
518        let raw = "Export 'helperFn' is never imported by other modules";
519        assert_eq!(
520            escape_md(raw),
521            r"Export 'helperFn' is never imported by other modules"
522        );
523    }
524
525    #[test]
526    fn escape_md_double_apply_is_safe() {
527        // Idempotency on the escape character itself: `\` always escapes,
528        // so escaping twice does not produce visual `\\\\` for callers that
529        // accidentally double-escape.
530        let raw = "code with `backticks` and *stars*";
531        let once = escape_md(raw);
532        let twice = escape_md(&once);
533        // Second pass adds an additional layer of escaping, which is
534        // expected: callers must not double-call. The contract is "single
535        // pass produces correct GFM"; we just assert it doesn't panic.
536        assert!(twice.contains(r"\\"));
537    }
538}