Skip to main content

fallow_cli/report/ci/
pr_comment.rs

1use std::fmt::Write as _;
2use std::process::ExitCode;
3use std::sync::OnceLock;
4
5use serde_json::Value;
6
7/// Workspace name, set once by `main()` when the binary is invoked with
8/// `--workspace <name>`. Read by `sticky_marker_id` to auto-suffix the
9/// sticky-comment marker per workspace, which keeps parallel per-workspace
10/// jobs from racing each other's sticky body on the same PR/MR.
11///
12/// `OnceLock` gives us safe cross-function read-after-set without env-var
13/// indirection. Only main writes; readers always observe the post-CLI-parse
14/// state.
15static WORKSPACE_MARKER: OnceLock<String> = OnceLock::new();
16
17/// Set the workspace marker from a `--workspace` selection list.
18///
19/// Single workspace -> the name itself, sanitised for marker grammar.
20/// N>1 workspaces -> a stable 6-char hex hash of the sorted, comma-joined
21/// list, prefixed with `w-`. Sort + join is deterministic so the same
22/// selection produces the same suffix across runs; two jobs with disjoint
23/// selections get distinct markers and don't race.
24#[allow(
25    dead_code,
26    reason = "called from main.rs bin target; lib target sees no caller"
27)]
28pub fn set_workspace_marker_from_list(values: &[String]) {
29    let trimmed: Vec<&str> = values
30        .iter()
31        .map(|value| value.trim())
32        .filter(|value| !value.is_empty())
33        .collect();
34    if trimmed.is_empty() {
35        return;
36    }
37    let marker = if let [single] = trimmed.as_slice() {
38        (*single).to_owned()
39    } else {
40        let mut sorted = trimmed.iter().map(|s| (*s).to_owned()).collect::<Vec<_>>();
41        sorted.sort();
42        let joined = sorted.join(",");
43        format!("w-{}", short_hex_hash(&joined))
44    };
45    let _ = WORKSPACE_MARKER.set(marker);
46}
47
48/// 6-char FNV-1a hex digest. Stable across Rust versions (FNV is content-
49/// determined), short enough for a marker suffix, wide enough that the
50/// chance of two real-world workspace selections colliding is ~1/16M.
51fn short_hex_hash(value: &str) -> String {
52    let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
53    for byte in value.bytes() {
54        hash ^= u64::from(byte);
55        hash = hash.wrapping_mul(0x0100_0000_01b3);
56    }
57    format!("{:06x}", (hash & 0x00ff_ffff) as u32)
58}
59
60#[derive(Clone, Copy, Debug, PartialEq, Eq)]
61pub enum Provider {
62    Github,
63    Gitlab,
64}
65
66impl Provider {
67    #[must_use]
68    pub const fn name(self) -> &'static str {
69        match self {
70            Self::Github => "GitHub",
71            Self::Gitlab => "GitLab",
72        }
73    }
74}
75
76#[derive(Clone, Debug, PartialEq, Eq)]
77pub struct CiIssue {
78    pub rule_id: String,
79    pub description: String,
80    pub severity: String,
81    pub path: String,
82    pub line: u64,
83    pub fingerprint: String,
84}
85
86#[must_use]
87pub fn issues_from_codeclimate(value: &Value) -> Vec<CiIssue> {
88    let mut issues = value
89        .as_array()
90        .into_iter()
91        .flatten()
92        .filter_map(issue_from_codeclimate)
93        .collect::<Vec<_>>();
94    issues
95        .sort_by(|a, b| (&a.path, a.line, &a.fingerprint).cmp(&(&b.path, b.line, &b.fingerprint)));
96    issues
97}
98
99fn issue_from_codeclimate(value: &Value) -> Option<CiIssue> {
100    let path = value.pointer("/location/path")?.as_str()?.to_string();
101    let line = value
102        .pointer("/location/lines/begin")
103        .and_then(Value::as_u64)
104        .unwrap_or(1);
105    Some(CiIssue {
106        rule_id: value
107            .get("check_name")
108            .and_then(Value::as_str)
109            .unwrap_or("fallow/finding")
110            .to_string(),
111        description: value
112            .get("description")
113            .and_then(Value::as_str)
114            .unwrap_or("Fallow finding")
115            .to_string(),
116        severity: value
117            .get("severity")
118            .and_then(Value::as_str)
119            .unwrap_or("minor")
120            .to_string(),
121        fingerprint: value
122            .get("fingerprint")
123            .and_then(Value::as_str)
124            .unwrap_or("")
125            .to_string(),
126        path,
127        line,
128    })
129}
130
131#[must_use]
132#[expect(clippy::expect_used, reason = "formatting into String is infallible")]
133pub fn render_pr_comment(command: &str, provider: Provider, issues: &[CiIssue]) -> String {
134    let marker_id = sticky_marker_id();
135    let marker = format!("<!-- fallow-id: {marker_id} -->");
136    let max = max_comments();
137    let title = command_title(command);
138    let count = issues.len();
139    let noun = if count == 1 { "finding" } else { "findings" };
140
141    let mut out = String::new();
142    out.push_str(&marker);
143    out.push('\n');
144    write!(&mut out, "### Fallow {title}\n\n").expect("write to string");
145    if count == 0 {
146        writeln!(
147            &mut out,
148            "No {provider} PR/MR findings.",
149            provider = provider.name()
150        )
151        .expect("write to string");
152    } else {
153        write!(&mut out, "Found **{count}** {noun}.\n\n").expect("write to string");
154        let groups = group_by_category(issues);
155        if groups.len() == 1 {
156            render_findings_table(&mut out, issues, max, "Details");
157        } else {
158            for (category, group_issues) in &groups {
159                let summary_label = summary_label(category, group_issues.len(), max);
160                render_findings_table(&mut out, group_issues, max, &summary_label);
161            }
162        }
163    }
164    out.push_str("\nGenerated by fallow.");
165    out
166}
167
168/// Build the `<details>` summary label for one category section. When the
169/// section is truncated by `max`, the label foreshadows the truncation
170/// (`Duplication (160, showing 50)`) so a reviewer expanding the section
171/// isn't surprised by the missing rows. When not truncated, the bare count
172/// reads as before.
173fn summary_label(category: &str, total: usize, max: usize) -> String {
174    if total > max {
175        format!("{category} ({total}, showing {max})")
176    } else {
177        format!("{category} ({total})")
178    }
179}
180
181#[expect(clippy::expect_used, reason = "formatting into String is infallible")]
182fn render_findings_table(out: &mut String, issues: &[CiIssue], max: usize, summary: &str) {
183    writeln!(out, "<details>\n<summary>{summary}</summary>\n").expect("write to string");
184    out.push_str("| Severity | Rule | Location | Description |\n");
185    out.push_str("| --- | --- | --- | --- |\n");
186    for issue in issues.iter().take(max) {
187        writeln!(
188            out,
189            "| {} | `{}` | `{}`:{} | {} |",
190            escape_md(&issue.severity),
191            escape_md(&issue.rule_id),
192            escape_md(&issue.path),
193            issue.line,
194            escape_md(&issue.description),
195        )
196        .expect("write to string");
197    }
198    if issues.len() > max {
199        writeln!(
200            out,
201            "\nShowing {max} of {} findings. Run fallow locally or inspect the CI output for the full report.",
202            issues.len(),
203        )
204        .expect("write to string");
205    }
206    out.push_str("\n</details>\n\n");
207}
208
209/// Map a fallow rule id to its category for sticky-comment grouping.
210///
211/// Single source of truth lives on `RuleDef::category` in `explain.rs`. This
212/// helper does the lookup so callers don't need to know about the registry;
213/// the look-up-then-fallback shape also keeps the renderer working for
214/// rules a downstream consumer added without registering (rare; produces
215/// the conservative "Dead code" default).
216#[must_use]
217pub fn category_for_rule(rule_id: &str) -> &'static str {
218    crate::explain::rule_by_id(rule_id).map_or("Dead code", |def| def.category)
219}
220
221/// Rule ids whose findings describe a project-wide config state (dependency
222/// hygiene, catalog state, override hygiene) rather than a change touching a
223/// specific source line. These findings anchor at fixed lines inside
224/// `package.json` / `pnpm-workspace.yaml`; the resolved-tree shifts that
225/// trigger them rarely coincide with a diff on the anchored line, so the
226/// line-based diff filter would silently hide them while CI still exits
227/// non-zero because of the same finding.
228///
229/// `filter_issues_for_summary` consults this list so the PR-comment body
230/// always explains config-anchored findings, matching the typical user
231/// expectation that `comment: true` produces a body covering every
232/// CI-failure reason. The review-envelope path keeps the unconditional
233/// filter because inline review comments must anchor on diff lines.
234const PROJECT_LEVEL_RULE_IDS: &[&str] = &[
235    "fallow/unused-catalog-entry",
236    "fallow/empty-catalog-group",
237    "fallow/unresolved-catalog-reference",
238    "fallow/unused-dependency-override",
239    "fallow/misconfigured-dependency-override",
240    "fallow/unused-dependency",
241    "fallow/unused-dev-dependency",
242    "fallow/unused-optional-dependency",
243    "fallow/type-only-dependency",
244    "fallow/test-only-dependency",
245];
246
247/// True when the rule's findings reflect project-wide config state and
248/// should bypass diff-aware filtering in the typed PR-comment renderer.
249/// See `PROJECT_LEVEL_RULE_IDS` for the full list and rationale.
250#[must_use]
251pub fn is_project_level_rule(rule_id: &str) -> bool {
252    PROJECT_LEVEL_RULE_IDS.contains(&rule_id)
253}
254
255/// Stable category ordering for the sticky comment. Reviewers see categories
256/// in the same order across PRs / runs, which matters for muscle memory.
257const CATEGORY_ORDER: [&str; 6] = [
258    "Dead code",
259    "Dependencies",
260    "Duplication",
261    "Health",
262    "Architecture",
263    "Suppressions",
264];
265
266fn group_by_category(issues: &[CiIssue]) -> Vec<(&'static str, Vec<CiIssue>)> {
267    let mut buckets: std::collections::BTreeMap<&'static str, Vec<CiIssue>> =
268        std::collections::BTreeMap::new();
269    for issue in issues {
270        let category = category_for_rule(&issue.rule_id);
271        buckets.entry(category).or_default().push(issue.clone());
272    }
273    let mut ordered: Vec<(&'static str, Vec<CiIssue>)> = Vec::with_capacity(buckets.len());
274    for category in CATEGORY_ORDER {
275        if let Some(items) = buckets.remove(category) {
276            ordered.push((category, items));
277        }
278    }
279    for (category, items) in buckets {
280        ordered.push((category, items));
281    }
282    ordered
283}
284
285fn max_comments() -> usize {
286    std::env::var("FALLOW_MAX_COMMENTS")
287        .ok()
288        .and_then(|value| value.parse::<usize>().ok())
289        .unwrap_or(50)
290}
291
292/// Compute the sticky-comment marker id. Precedence (highest first):
293///
294/// 1. `FALLOW_COMMENT_ID` set by the user explicitly: use as-is.
295/// 2. `WORKSPACE_MARKER` populated by `main()` from `--workspace <name>`:
296///    suffix the default to avoid colliding with a sibling per-workspace
297///    job's sticky on the same PR/MR.
298/// 3. Plain `fallow-results`.
299///
300/// The collision case (2) is the common monorepo shape: parallel jobs each
301/// run fallow scoped to one workspace package and post their own sticky.
302/// Without a per-workspace suffix every job edits the same marker, racing
303/// each other's bodies on every CI re-run.
304fn sticky_marker_id() -> String {
305    if let Ok(value) = std::env::var("FALLOW_COMMENT_ID")
306        && !value.trim().is_empty()
307    {
308        return value;
309    }
310    let suffix = WORKSPACE_MARKER
311        .get()
312        .map(|value| value.trim())
313        .filter(|value| !value.is_empty())
314        .map(sanitize_marker_segment);
315    match suffix {
316        Some(workspace) => format!("fallow-results-{workspace}"),
317        None => "fallow-results".to_owned(),
318    }
319}
320
321/// Strip characters that would break the HTML-comment marker. The marker
322/// shape is `<!-- fallow-id: <id> -->`; `<`, `>`, and `--` are reserved by
323/// the HTML comment grammar, and whitespace would split the id when the
324/// reader scans for it.
325fn sanitize_marker_segment(value: &str) -> String {
326    value
327        .chars()
328        .map(|ch| {
329            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' {
330                ch
331            } else {
332                '-'
333            }
334        })
335        .collect::<String>()
336        .trim_matches('-')
337        .to_owned()
338}
339
340#[must_use]
341pub fn print_pr_comment(command: &str, provider: Provider, codeclimate: &Value) -> ExitCode {
342    let issues =
343        super::diff_filter::filter_issues_for_summary(issues_from_codeclimate(codeclimate));
344    println!("{}", render_pr_comment(command, provider, &issues));
345    ExitCode::SUCCESS
346}
347
348#[must_use]
349pub fn command_title(command: &str) -> &'static str {
350    match command {
351        "dead-code" | "check" => "dead-code report",
352        "dupes" => "duplication report",
353        "health" => "health report",
354        "audit" => "audit report",
355        "" | "combined" => "combined report",
356        _ => "report",
357    }
358}
359
360/// Escape a string for inclusion in a Markdown table cell.
361///
362/// Table cells render through GitHub-Flavored Markdown and GitLab Flavored
363/// Markdown as inline content, so cell-internal markers can flip the cell to
364/// emphasis, link, image, code, HTML, or strikethrough. Newlines collapse to
365/// spaces because a literal newline terminates the table row. The escape set
366/// covers every CommonMark inline construct that can fire mid-cell:
367///
368/// - `\` (escape character itself)
369/// - `` ` `` (inline code)
370/// - `*` `_` (emphasis / strong)
371/// - `[` `]` `(` `)` (link / image syntax)
372/// - `!` (image when followed by `[`)
373/// - `<` `>` (raw HTML / autolinks)
374/// - `#` (cell rendered as heading when first character of the cell)
375/// - `|` (table cell separator)
376/// - `~` (strikethrough on GFM)
377/// - `&` (HTML numeric / named entity decode: `&#42;` would otherwise
378///   render as `*` after our escape and reintroduce the bypass)
379///
380/// Line-start markers (`.`, `-`, `+`, `1.`) are intentionally NOT escaped:
381/// they are only meaningful at the start of a block-level line, and table
382/// cells render as paragraph-equivalent inline content where these are inert.
383/// Escaping them produces visually noisy output (`fallow/test\-only-dep`)
384/// without correctness benefit.
385#[must_use]
386pub fn escape_md(value: &str) -> String {
387    let collapsed = value.replace('\n', " ");
388    let mut out = String::with_capacity(collapsed.len());
389    for ch in collapsed.chars() {
390        if matches!(
391            ch,
392            '\\' | '`'
393                | '*'
394                | '_'
395                | '['
396                | ']'
397                | '('
398                | ')'
399                | '!'
400                | '<'
401                | '>'
402                | '#'
403                | '|'
404                | '~'
405                | '&'
406        ) {
407            out.push('\\');
408        }
409        out.push(ch);
410    }
411    out.trim().to_owned()
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    #[test]
419    fn extracts_issues_from_codeclimate() {
420        let value = serde_json::json!([{
421            "check_name": "fallow/unused-export",
422            "description": "Export x is never imported",
423            "severity": "minor",
424            "fingerprint": "abc",
425            "location": { "path": "src/a.ts", "lines": { "begin": 7 } }
426        }]);
427        let issues = issues_from_codeclimate(&value);
428        assert_eq!(issues.len(), 1);
429        assert_eq!(issues[0].path, "src/a.ts");
430        assert_eq!(issues[0].line, 7);
431    }
432
433    #[test]
434    fn sticky_marker_id_default_when_nothing_set() {
435        let body = render_pr_comment("check", Provider::Github, &[]);
436        assert!(body.contains("<!-- fallow-id: fallow-results"));
437        assert!(body.contains("No GitHub PR/MR findings."));
438    }
439
440    #[test]
441    fn short_hex_hash_is_deterministic_and_six_chars() {
442        let a = short_hex_hash("api,worker");
443        assert_eq!(a.len(), 6);
444        assert_eq!(a, short_hex_hash("api,worker"));
445        assert_ne!(a, short_hex_hash("admin,web"));
446    }
447
448    #[test]
449    fn sanitize_marker_segment_collapses_unsafe_chars_to_dashes() {
450        assert_eq!(sanitize_marker_segment("@fallow/runtime"), "fallow-runtime");
451        assert_eq!(
452            sanitize_marker_segment("packages/web ui"),
453            "packages-web-ui"
454        );
455        assert_eq!(sanitize_marker_segment("plain"), "plain");
456        assert_eq!(
457            sanitize_marker_segment("--leading-trailing--"),
458            "leading-trailing"
459        );
460    }
461
462    #[test]
463    fn escape_md_escapes_inline_commonmark_specials() {
464        let raw = "foo*bar_baz [a](u) `c` <h> #x !i ~s | p";
465        let escaped = escape_md(raw);
466        for ch in [
467            '*', '_', '[', ']', '(', ')', '`', '<', '>', '#', '!', '~', '|',
468        ] {
469            let raw_count = raw.chars().filter(|c| c == &ch).count();
470            let escaped_count = escaped.matches(&format!("\\{ch}")).count();
471            assert_eq!(
472                raw_count, escaped_count,
473                "char {ch:?}: raw {raw_count} occurrences, escaped {escaped_count} in {escaped:?}"
474            );
475        }
476    }
477
478    #[test]
479    fn escape_md_escapes_ampersand_to_block_numeric_entity_bypass() {
480        let raw = "value &#42;suspicious&#42; here";
481        let escaped = escape_md(raw);
482        assert!(escaped.contains(r"\&"), "got: {escaped}");
483        assert!(escaped.contains(r"\#"), "got: {escaped}");
484        assert!(!escaped.contains(" *suspicious"), "got: {escaped}");
485    }
486
487    #[test]
488    fn summary_label_foreshadows_truncation() {
489        assert_eq!(
490            summary_label("Duplication", 160, 50),
491            "Duplication (160, showing 50)"
492        );
493        assert_eq!(summary_label("Health", 12, 50), "Health (12)");
494        assert_eq!(summary_label("Dependencies", 50, 50), "Dependencies (50)");
495    }
496
497    #[test]
498    fn escape_md_does_not_escape_block_only_markers() {
499        let raw = "fallow/test-only-dependency package.json:12";
500        let escaped = escape_md(raw);
501        assert!(!escaped.contains("\\-"), "should not escape `-`");
502        assert!(!escaped.contains("\\."), "should not escape `.`");
503        assert_eq!(escaped, raw);
504    }
505
506    #[test]
507    fn escape_md_collapses_newlines_to_spaces() {
508        let raw = "first\nsecond\nthird";
509        assert_eq!(escape_md(raw), "first second third");
510    }
511
512    #[test]
513    fn escape_md_leaves_safe_chars_unchanged() {
514        let raw = "Export 'helperFn' is never imported by other modules";
515        assert_eq!(
516            escape_md(raw),
517            r"Export 'helperFn' is never imported by other modules"
518        );
519    }
520
521    #[test]
522    fn is_project_level_rule_covers_config_anchored_dependency_findings() {
523        for rule_id in PROJECT_LEVEL_RULE_IDS {
524            assert!(
525                is_project_level_rule(rule_id),
526                "{rule_id} must be project-level"
527            );
528        }
529        for rule_id in [
530            "fallow/unused-file",
531            "fallow/unused-export",
532            "fallow/unused-type",
533            "fallow/unused-enum-member",
534            "fallow/unused-class-member",
535            "fallow/unresolved-import",
536            "fallow/unlisted-dependency",
537            "fallow/duplicate-export",
538            "fallow/circular-dependency",
539            "fallow/re-export-cycle",
540            "fallow/boundary-violation",
541            "fallow/stale-suppression",
542            "fallow/private-type-leak",
543            "fallow/high-complexity",
544            "fallow/high-crap-score",
545        ] {
546            assert!(
547                !is_project_level_rule(rule_id),
548                "{rule_id} must NOT be project-level"
549            );
550        }
551    }
552
553    #[test]
554    fn project_level_rule_ids_each_register_in_explain_registry() {
555        for rule_id in PROJECT_LEVEL_RULE_IDS {
556            assert!(
557                crate::explain::rule_by_id(rule_id).is_some(),
558                "{rule_id} listed in PROJECT_LEVEL_RULE_IDS but not in explain registry"
559            );
560        }
561    }
562
563    #[test]
564    fn escape_md_double_apply_is_safe() {
565        let raw = "code with `backticks` and *stars*";
566        let once = escape_md(raw);
567        let twice = escape_md(&once);
568        assert!(twice.contains(r"\\"));
569    }
570}