Skip to main content

fallow_cli/report/ci/
pr_comment.rs

1use std::fmt::Write as _;
2use std::process::ExitCode;
3use std::sync::OnceLock;
4
5use serde_json::Value;
6
7/// Workspace name, set once by `main()` when the binary is invoked with
8/// `--workspace <name>`. Read by `sticky_marker_id` to auto-suffix the
9/// sticky-comment marker per workspace, which keeps parallel per-workspace
10/// jobs from racing each other's sticky body on the same PR/MR.
11///
12/// `OnceLock` gives us safe cross-function read-after-set without env-var
13/// indirection. Only main writes; readers always observe the post-CLI-parse
14/// state.
15static WORKSPACE_MARKER: OnceLock<String> = OnceLock::new();
16
17/// Set the workspace marker from a `--workspace` selection list.
18///
19/// Single workspace -> the name itself, sanitised for marker grammar.
20/// N>1 workspaces -> a stable 6-char hex hash of the sorted, comma-joined
21/// list, prefixed with `w-`. Sort + join is deterministic so the same
22/// selection produces the same suffix across runs; two jobs with disjoint
23/// selections get distinct markers and don't race.
24#[allow(
25    dead_code,
26    reason = "called from main.rs bin target; lib target sees no caller"
27)]
28pub fn set_workspace_marker_from_list(values: &[String]) {
29    let trimmed: Vec<&str> = values
30        .iter()
31        .map(|value| value.trim())
32        .filter(|value| !value.is_empty())
33        .collect();
34    if trimmed.is_empty() {
35        return;
36    }
37    let marker = if let [single] = trimmed.as_slice() {
38        (*single).to_owned()
39    } else {
40        let mut sorted = trimmed.iter().map(|s| (*s).to_owned()).collect::<Vec<_>>();
41        sorted.sort();
42        let joined = sorted.join(",");
43        format!("w-{}", short_hex_hash(&joined))
44    };
45    let _ = WORKSPACE_MARKER.set(marker);
46}
47
48/// 6-char FNV-1a hex digest. Stable across Rust versions (FNV is content-
49/// determined), short enough for a marker suffix, wide enough that the
50/// chance of two real-world workspace selections colliding is ~1/16M.
51fn short_hex_hash(value: &str) -> String {
52    let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
53    for byte in value.bytes() {
54        hash ^= u64::from(byte);
55        hash = hash.wrapping_mul(0x0100_0000_01b3);
56    }
57    format!("{:06x}", (hash & 0x00ff_ffff) as u32)
58}
59
60#[derive(Clone, Copy, Debug, PartialEq, Eq)]
61pub enum Provider {
62    Github,
63    Gitlab,
64}
65
66impl Provider {
67    #[must_use]
68    pub const fn name(self) -> &'static str {
69        match self {
70            Self::Github => "GitHub",
71            Self::Gitlab => "GitLab",
72        }
73    }
74}
75
76#[derive(Clone, Debug, PartialEq, Eq)]
77pub struct CiIssue {
78    pub rule_id: String,
79    pub description: String,
80    pub severity: String,
81    pub path: String,
82    pub line: u64,
83    pub fingerprint: String,
84}
85
86#[must_use]
87pub fn issues_from_codeclimate(value: &Value) -> Vec<CiIssue> {
88    let mut issues = value
89        .as_array()
90        .into_iter()
91        .flatten()
92        .filter_map(issue_from_codeclimate)
93        .collect::<Vec<_>>();
94    issues
95        .sort_by(|a, b| (&a.path, a.line, &a.fingerprint).cmp(&(&b.path, b.line, &b.fingerprint)));
96    issues
97}
98
99fn issue_from_codeclimate(value: &Value) -> Option<CiIssue> {
100    let path = value.pointer("/location/path")?.as_str()?.to_string();
101    let line = value
102        .pointer("/location/lines/begin")
103        .and_then(Value::as_u64)
104        .unwrap_or(1);
105    Some(CiIssue {
106        rule_id: value
107            .get("check_name")
108            .and_then(Value::as_str)
109            .unwrap_or("fallow/finding")
110            .to_string(),
111        description: value
112            .get("description")
113            .and_then(Value::as_str)
114            .unwrap_or("Fallow finding")
115            .to_string(),
116        severity: value
117            .get("severity")
118            .and_then(Value::as_str)
119            .unwrap_or("minor")
120            .to_string(),
121        fingerprint: value
122            .get("fingerprint")
123            .and_then(Value::as_str)
124            .unwrap_or("")
125            .to_string(),
126        path,
127        line,
128    })
129}
130
131#[must_use]
132pub fn render_pr_comment(command: &str, provider: Provider, issues: &[CiIssue]) -> String {
133    let marker_id = sticky_marker_id();
134    let marker = format!("<!-- fallow-id: {marker_id} -->");
135    let max = max_comments();
136    let title = command_title(command);
137    let count = issues.len();
138    let noun = if count == 1 { "finding" } else { "findings" };
139
140    let mut out = String::new();
141    out.push_str(&marker);
142    out.push('\n');
143    write!(&mut out, "### Fallow {title}\n\n").expect("write to string");
144    if count == 0 {
145        writeln!(
146            &mut out,
147            "No {provider} PR/MR findings.",
148            provider = provider.name()
149        )
150        .expect("write to string");
151    } else {
152        write!(&mut out, "Found **{count}** {noun}.\n\n").expect("write to string");
153        let groups = group_by_category(issues);
154        // Single-category invocations (e.g. `fallow check --format pr-comment-github`)
155        // get the original flat-table shape. Combined / multi-category runs get
156        // one collapsible section per category so reviewers can fold by area.
157        if groups.len() == 1 {
158            render_findings_table(&mut out, issues, max, "Details");
159        } else {
160            for (category, group_issues) in &groups {
161                let summary_label = summary_label(category, group_issues.len(), max);
162                render_findings_table(&mut out, group_issues, max, &summary_label);
163            }
164        }
165    }
166    out.push_str("\nGenerated by fallow.");
167    out
168}
169
170/// Build the `<details>` summary label for one category section. When the
171/// section is truncated by `max`, the label foreshadows the truncation
172/// (`Duplication (160, showing 50)`) so a reviewer expanding the section
173/// isn't surprised by the missing rows. When not truncated, the bare count
174/// reads as before.
175fn summary_label(category: &str, total: usize, max: usize) -> String {
176    if total > max {
177        format!("{category} ({total}, showing {max})")
178    } else {
179        format!("{category} ({total})")
180    }
181}
182
183fn render_findings_table(out: &mut String, issues: &[CiIssue], max: usize, summary: &str) {
184    writeln!(out, "<details>\n<summary>{summary}</summary>\n").expect("write to string");
185    out.push_str("| Severity | Rule | Location | Description |\n");
186    out.push_str("| --- | --- | --- | --- |\n");
187    for issue in issues.iter().take(max) {
188        writeln!(
189            out,
190            "| {} | `{}` | `{}`:{} | {} |",
191            escape_md(&issue.severity),
192            escape_md(&issue.rule_id),
193            escape_md(&issue.path),
194            issue.line,
195            escape_md(&issue.description),
196        )
197        .expect("write to string");
198    }
199    if issues.len() > max {
200        writeln!(
201            out,
202            "\nShowing {max} of {} findings. Run fallow locally or inspect the CI output for the full report.",
203            issues.len(),
204        )
205        .expect("write to string");
206    }
207    out.push_str("\n</details>\n\n");
208}
209
210/// Map a fallow rule id to its category for sticky-comment grouping.
211///
212/// Single source of truth lives on `RuleDef::category` in `explain.rs`. This
213/// helper does the lookup so callers don't need to know about the registry;
214/// the look-up-then-fallback shape also keeps the renderer working for
215/// rules a downstream consumer added without registering (rare; produces
216/// the conservative "Dead code" default).
217#[must_use]
218pub fn category_for_rule(rule_id: &str) -> &'static str {
219    crate::explain::rule_by_id(rule_id).map_or("Dead code", |def| def.category)
220}
221
222/// Rule ids whose findings describe a project-wide config state (dependency
223/// hygiene, catalog state, override hygiene) rather than a change touching a
224/// specific source line. These findings anchor at fixed lines inside
225/// `package.json` / `pnpm-workspace.yaml`; the resolved-tree shifts that
226/// trigger them rarely coincide with a diff on the anchored line, so the
227/// line-based diff filter would silently hide them while CI still exits
228/// non-zero because of the same finding.
229///
230/// `filter_issues_for_summary` consults this list so the PR-comment body
231/// always explains config-anchored findings, matching the typical user
232/// expectation that `comment: true` produces a body covering every
233/// CI-failure reason. The review-envelope path keeps the unconditional
234/// filter because inline review comments must anchor on diff lines.
235const PROJECT_LEVEL_RULE_IDS: &[&str] = &[
236    "fallow/unused-catalog-entry",
237    "fallow/empty-catalog-group",
238    "fallow/unresolved-catalog-reference",
239    "fallow/unused-dependency-override",
240    "fallow/misconfigured-dependency-override",
241    "fallow/unused-dependency",
242    "fallow/unused-dev-dependency",
243    "fallow/unused-optional-dependency",
244    "fallow/type-only-dependency",
245    "fallow/test-only-dependency",
246];
247
248/// True when the rule's findings reflect project-wide config state and
249/// should bypass diff-aware filtering in the typed PR-comment renderer.
250/// See `PROJECT_LEVEL_RULE_IDS` for the full list and rationale.
251#[must_use]
252pub fn is_project_level_rule(rule_id: &str) -> bool {
253    PROJECT_LEVEL_RULE_IDS.contains(&rule_id)
254}
255
256/// Stable category ordering for the sticky comment. Reviewers see categories
257/// in the same order across PRs / runs, which matters for muscle memory.
258const CATEGORY_ORDER: [&str; 6] = [
259    "Dead code",
260    "Dependencies",
261    "Duplication",
262    "Health",
263    "Architecture",
264    "Suppressions",
265];
266
267fn group_by_category(issues: &[CiIssue]) -> Vec<(&'static str, Vec<CiIssue>)> {
268    let mut buckets: std::collections::BTreeMap<&'static str, Vec<CiIssue>> =
269        std::collections::BTreeMap::new();
270    for issue in issues {
271        let category = category_for_rule(&issue.rule_id);
272        buckets.entry(category).or_default().push(issue.clone());
273    }
274    let mut ordered: Vec<(&'static str, Vec<CiIssue>)> = Vec::with_capacity(buckets.len());
275    // Emit known categories in the declared order first.
276    for category in CATEGORY_ORDER {
277        if let Some(items) = buckets.remove(category) {
278            ordered.push((category, items));
279        }
280    }
281    // Anything left over (future categories not yet ordered) goes after.
282    for (category, items) in buckets {
283        ordered.push((category, items));
284    }
285    ordered
286}
287
288fn max_comments() -> usize {
289    std::env::var("FALLOW_MAX_COMMENTS")
290        .ok()
291        .and_then(|value| value.parse::<usize>().ok())
292        .unwrap_or(50)
293}
294
295/// Compute the sticky-comment marker id. Precedence (highest first):
296///
297/// 1. `FALLOW_COMMENT_ID` set by the user explicitly: use as-is.
298/// 2. `WORKSPACE_MARKER` populated by `main()` from `--workspace <name>`:
299///    suffix the default to avoid colliding with a sibling per-workspace
300///    job's sticky on the same PR/MR.
301/// 3. Plain `fallow-results`.
302///
303/// The collision case (2) is the common monorepo shape: parallel jobs each
304/// run fallow scoped to one workspace package and post their own sticky.
305/// Without a per-workspace suffix every job edits the same marker, racing
306/// each other's bodies on every CI re-run.
307fn sticky_marker_id() -> String {
308    if let Ok(value) = std::env::var("FALLOW_COMMENT_ID")
309        && !value.trim().is_empty()
310    {
311        return value;
312    }
313    let suffix = WORKSPACE_MARKER
314        .get()
315        .map(|value| value.trim())
316        .filter(|value| !value.is_empty())
317        .map(sanitize_marker_segment);
318    match suffix {
319        Some(workspace) => format!("fallow-results-{workspace}"),
320        None => "fallow-results".to_owned(),
321    }
322}
323
324/// Strip characters that would break the HTML-comment marker. The marker
325/// shape is `<!-- fallow-id: <id> -->`; `<`, `>`, and `--` are reserved by
326/// the HTML comment grammar, and whitespace would split the id when the
327/// reader scans for it.
328fn sanitize_marker_segment(value: &str) -> String {
329    value
330        .chars()
331        .map(|ch| {
332            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
333                ch
334            } else {
335                '-'
336            }
337        })
338        .collect::<String>()
339        .trim_matches('-')
340        .to_owned()
341}
342
343#[must_use]
344pub fn print_pr_comment(command: &str, provider: Provider, codeclimate: &Value) -> ExitCode {
345    let issues =
346        super::diff_filter::filter_issues_for_summary(issues_from_codeclimate(codeclimate));
347    println!("{}", render_pr_comment(command, provider, &issues));
348    ExitCode::SUCCESS
349}
350
351#[must_use]
352pub fn command_title(command: &str) -> &'static str {
353    match command {
354        "dead-code" | "check" => "dead-code report",
355        "dupes" => "duplication report",
356        "health" => "health report",
357        "audit" => "audit report",
358        "" | "combined" => "combined report",
359        _ => "report",
360    }
361}
362
363/// Escape a string for inclusion in a Markdown table cell.
364///
365/// Table cells render through GitHub-Flavored Markdown and GitLab Flavored
366/// Markdown as inline content, so cell-internal markers can flip the cell to
367/// emphasis, link, image, code, HTML, or strikethrough. Newlines collapse to
368/// spaces because a literal newline terminates the table row. The escape set
369/// covers every CommonMark inline construct that can fire mid-cell:
370///
371/// - `\` (escape character itself)
372/// - `` ` `` (inline code)
373/// - `*` `_` (emphasis / strong)
374/// - `[` `]` `(` `)` (link / image syntax)
375/// - `!` (image when followed by `[`)
376/// - `<` `>` (raw HTML / autolinks)
377/// - `#` (cell rendered as heading when first character of the cell)
378/// - `|` (table cell separator)
379/// - `~` (strikethrough on GFM)
380/// - `&` (HTML numeric / named entity decode: `&#42;` would otherwise
381///   render as `*` after our escape and reintroduce the bypass)
382///
383/// Line-start markers (`.`, `-`, `+`, `1.`) are intentionally NOT escaped:
384/// they are only meaningful at the start of a block-level line, and table
385/// cells render as paragraph-equivalent inline content where these are inert.
386/// Escaping them produces visually noisy output (`fallow/test\-only-dep`)
387/// without correctness benefit.
388#[must_use]
389pub fn escape_md(value: &str) -> String {
390    let collapsed = value.replace('\n', " ");
391    let mut out = String::with_capacity(collapsed.len());
392    for ch in collapsed.chars() {
393        if matches!(
394            ch,
395            '\\' | '`'
396                | '*'
397                | '_'
398                | '['
399                | ']'
400                | '('
401                | ')'
402                | '!'
403                | '<'
404                | '>'
405                | '#'
406                | '|'
407                | '~'
408                | '&'
409        ) {
410            out.push('\\');
411        }
412        out.push(ch);
413    }
414    out.trim().to_owned()
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420
421    #[test]
422    fn extracts_issues_from_codeclimate() {
423        let value = serde_json::json!([{
424            "check_name": "fallow/unused-export",
425            "description": "Export x is never imported",
426            "severity": "minor",
427            "fingerprint": "abc",
428            "location": { "path": "src/a.ts", "lines": { "begin": 7 } }
429        }]);
430        let issues = issues_from_codeclimate(&value);
431        assert_eq!(issues.len(), 1);
432        assert_eq!(issues[0].path, "src/a.ts");
433        assert_eq!(issues[0].line, 7);
434    }
435
436    #[test]
437    fn sticky_marker_id_default_when_nothing_set() {
438        // WORKSPACE_MARKER is a OnceLock that's set-once-per-process; tests
439        // can't unset it. We only assert about the unset branch when the
440        // OnceLock hasn't been touched, which is the case in this test if
441        // it's the first marker test to run. To keep tests order-independent
442        // we test sanitize_marker_segment + sticky_marker_id-with-mock
443        // separately rather than racing the OnceLock state.
444        let body = render_pr_comment("check", Provider::Github, &[]);
445        // The marker prefix is always `<!-- fallow-id: fallow-results`,
446        // regardless of whether a workspace suffix follows.
447        assert!(body.contains("<!-- fallow-id: fallow-results"));
448        assert!(body.contains("No GitHub PR/MR findings."));
449    }
450
451    #[test]
452    fn short_hex_hash_is_deterministic_and_six_chars() {
453        let a = short_hex_hash("api,worker");
454        assert_eq!(a.len(), 6);
455        // Same input -> same hash across calls.
456        assert_eq!(a, short_hex_hash("api,worker"));
457        // Different input -> different hash (modulo collision; the
458        // workspace-marker assertion is "monorepo with 2-10 distinct
459        // workspaces should not race", which a 6-hex-char suffix
460        // satisfies at ~1/16M collision rate).
461        assert_ne!(a, short_hex_hash("admin,web"));
462    }
463
464    #[test]
465    fn sanitize_marker_segment_collapses_unsafe_chars_to_dashes() {
466        // `@`, `/`, spaces, and other special chars all become `-`.
467        // Leading and trailing dashes are trimmed.
468        assert_eq!(sanitize_marker_segment("@fallow/runtime"), "fallow-runtime");
469        assert_eq!(
470            sanitize_marker_segment("packages/web ui"),
471            "packages-web-ui"
472        );
473        assert_eq!(sanitize_marker_segment("plain"), "plain");
474        assert_eq!(
475            sanitize_marker_segment("--leading-trailing--"),
476            "leading-trailing"
477        );
478    }
479
480    #[test]
481    fn escape_md_escapes_inline_commonmark_specials() {
482        // Inline-context CommonMark specials must escape: emphasis, links,
483        // images, code, HTML, headings (when first char of cell), pipes,
484        // strikethrough.
485        let raw = "foo*bar_baz [a](u) `c` <h> #x !i ~s | p";
486        let escaped = escape_md(raw);
487        for ch in [
488            '*', '_', '[', ']', '(', ')', '`', '<', '>', '#', '!', '~', '|',
489        ] {
490            let raw_count = raw.chars().filter(|c| c == &ch).count();
491            let escaped_count = escaped.matches(&format!("\\{ch}")).count();
492            assert_eq!(
493                raw_count, escaped_count,
494                "char {ch:?}: raw {raw_count} occurrences, escaped {escaped_count} in {escaped:?}"
495            );
496        }
497    }
498
499    #[test]
500    fn escape_md_escapes_ampersand_to_block_numeric_entity_bypass() {
501        // Without escaping `&`, a description containing `&#42;` would render
502        // as `*` AFTER our escape pass, reintroducing the emphasis-injection
503        // we explicitly defended against. Escaping the `&` (and `#`) breaks
504        // the entity so it renders literally.
505        let raw = "value &#42;suspicious&#42; here";
506        let escaped = escape_md(raw);
507        // Both `&` and `#` are escaped, so the entity becomes `\&\#42;`,
508        // which Markdown renders as a literal `&#42;` instead of a `*`.
509        assert!(escaped.contains(r"\&"), "got: {escaped}");
510        assert!(escaped.contains(r"\#"), "got: {escaped}");
511        // Defence-in-depth: the substring " *suspicious" only appears if
512        // the entity decoded; with both escapes in place it cannot.
513        assert!(!escaped.contains(" *suspicious"), "got: {escaped}");
514    }
515
516    #[test]
517    fn summary_label_foreshadows_truncation() {
518        // When the section is truncated, the <details> summary tells the
519        // reader BEFORE they click that fewer rows than the count appear.
520        assert_eq!(
521            summary_label("Duplication", 160, 50),
522            "Duplication (160, showing 50)"
523        );
524        // When the section fits, the bare count reads as before.
525        assert_eq!(summary_label("Health", 12, 50), "Health (12)");
526        assert_eq!(summary_label("Dependencies", 50, 50), "Dependencies (50)");
527    }
528
529    #[test]
530    fn escape_md_does_not_escape_block_only_markers() {
531        // `.`, `-`, `+` are only special at the start of a block-level line
532        // (ordered / unordered list markers). Table cells are inline; over-
533        // escaping these produces visually noisy `\-` / `\.` in the cell.
534        let raw = "fallow/test-only-dependency package.json:12";
535        let escaped = escape_md(raw);
536        assert!(!escaped.contains("\\-"), "should not escape `-`");
537        assert!(!escaped.contains("\\."), "should not escape `.`");
538        assert_eq!(escaped, raw);
539    }
540
541    #[test]
542    fn escape_md_collapses_newlines_to_spaces() {
543        // Table cells are single-line by construction; a literal newline in
544        // a description would terminate the row and break the table.
545        let raw = "first\nsecond\nthird";
546        assert_eq!(escape_md(raw), "first second third");
547    }
548
549    #[test]
550    fn escape_md_leaves_safe_chars_unchanged() {
551        // Plain alphanumeric, spaces, slashes, colons, equals, quotes: all
552        // legal inside a Markdown table cell.
553        let raw = "Export 'helperFn' is never imported by other modules";
554        assert_eq!(
555            escape_md(raw),
556            r"Export 'helperFn' is never imported by other modules"
557        );
558    }
559
560    #[test]
561    fn is_project_level_rule_covers_config_anchored_dependency_findings() {
562        for rule_id in PROJECT_LEVEL_RULE_IDS {
563            assert!(
564                is_project_level_rule(rule_id),
565                "{rule_id} must be project-level"
566            );
567        }
568        // Per-source-file rules stay diff-filterable so the comment body
569        // keeps focus on the lines a PR actually changed.
570        for rule_id in [
571            "fallow/unused-file",
572            "fallow/unused-export",
573            "fallow/unused-type",
574            "fallow/unused-enum-member",
575            "fallow/unused-class-member",
576            "fallow/unresolved-import",
577            "fallow/unlisted-dependency",
578            "fallow/duplicate-export",
579            "fallow/circular-dependency",
580            "fallow/boundary-violation",
581            "fallow/stale-suppression",
582            "fallow/private-type-leak",
583            "fallow/high-complexity",
584            "fallow/high-crap-score",
585        ] {
586            assert!(
587                !is_project_level_rule(rule_id),
588                "{rule_id} must NOT be project-level"
589            );
590        }
591    }
592
593    #[test]
594    fn project_level_rule_ids_each_register_in_explain_registry() {
595        // Drift guard: every project-level id must resolve to a `RuleDef` so
596        // the SARIF help URI, `_meta`, and sticky-comment category stay
597        // consistent with the bypass list.
598        for rule_id in PROJECT_LEVEL_RULE_IDS {
599            assert!(
600                crate::explain::rule_by_id(rule_id).is_some(),
601                "{rule_id} listed in PROJECT_LEVEL_RULE_IDS but not in explain registry"
602            );
603        }
604    }
605
606    #[test]
607    fn escape_md_double_apply_is_safe() {
608        // Idempotency on the escape character itself: `\` always escapes,
609        // so escaping twice does not produce visual `\\\\` for callers that
610        // accidentally double-escape.
611        let raw = "code with `backticks` and *stars*";
612        let once = escape_md(raw);
613        let twice = escape_md(&once);
614        // Second pass adds an additional layer of escaping, which is
615        // expected: callers must not double-call. The contract is "single
616        // pass produces correct GFM"; we just assert it doesn't panic.
617        assert!(twice.contains(r"\\"));
618    }
619}