Skip to main content

tokmd_format/analysis/
html.rs

1//! Analysis HTML rendering.
2//!
3//! Single-responsibility HTML renderer for `AnalysisReceipt`.
4
5use time::OffsetDateTime;
6use time::macros::format_description;
7use tokmd_analysis_types::AnalysisReceipt;
8
9/// Render a self-contained HTML report for an analysis receipt.
10pub fn render(receipt: &AnalysisReceipt) -> String {
11    const TEMPLATE: &str = include_str!("templates/report.html");
12
13    let timestamp = timestamp_utc();
14    let metrics_cards = build_metrics_cards(receipt);
15    let table_rows = build_table_rows(receipt);
16    let report_json = build_report_json(receipt);
17
18    TEMPLATE
19        .replace("{{TIMESTAMP}}", &timestamp)
20        .replace("{{METRICS_CARDS}}", &metrics_cards)
21        .replace("{{TABLE_ROWS}}", &table_rows)
22        .replace("{{REPORT_JSON}}", &report_json)
23}
24
25fn timestamp_utc() -> String {
26    let format = format_description!("[year]-[month]-[day] [hour]:[minute]:[second] UTC");
27    OffsetDateTime::now_utc()
28        .format(&format)
29        .unwrap_or_else(|_| "1970-01-01 00:00:00 UTC".to_string())
30}
31
32fn build_metrics_cards(receipt: &AnalysisReceipt) -> String {
33    let mut cards = String::new();
34
35    if let Some(derived) = &receipt.derived {
36        let metrics = [
37            ("Files", derived.totals.files.to_string()),
38            ("Lines", format_number(derived.totals.lines)),
39            ("Code", format_number(derived.totals.code)),
40            ("Tokens", format_number(derived.totals.tokens)),
41            ("Doc%", format_pct(derived.doc_density.total.ratio)),
42        ];
43
44        for (label, value) in metrics {
45            cards.push_str(&format!(
46                r#"<div class="metric-card"><span class="value">{}</span><span class="label">{}</span></div>"#,
47                value, label
48            ));
49        }
50
51        if let Some(ctx) = &derived.context_window {
52            cards.push_str(&format!(
53                r#"<div class="metric-card"><span class="value">{}</span><span class="label">Context Fit</span></div>"#,
54                format_pct(ctx.pct)
55            ));
56        }
57    }
58
59    cards
60}
61
62fn build_table_rows(receipt: &AnalysisReceipt) -> String {
63    let mut rows = String::new();
64
65    if let Some(derived) = &receipt.derived {
66        for row in derived.top.largest_lines.iter().take(100) {
67            rows.push_str(&format!(
68                r#"<tr><td class="path" data-path="{path}">{path}</td><td data-module="{module}">{module}</td><td data-lang="{lang}"><span class="lang-badge">{lang}</span></td><td class="num" data-lines="{lines}">{lines_fmt}</td><td class="num" data-code="{code}">{code_fmt}</td><td class="num" data-tokens="{tokens}">{tokens_fmt}</td><td class="num" data-bytes="{bytes}">{bytes_fmt}</td></tr>"#,
69                path = escape_html(&row.path),
70                module = escape_html(&row.module),
71                lang = escape_html(&row.lang),
72                lines = row.lines,
73                lines_fmt = format_number(row.lines),
74                code = row.code,
75                code_fmt = format_number(row.code),
76                tokens = row.tokens,
77                tokens_fmt = format_number(row.tokens),
78                bytes = row.bytes,
79                bytes_fmt = format_number(row.bytes),
80            ));
81        }
82    }
83
84    rows
85}
86
87fn build_report_json(receipt: &AnalysisReceipt) -> String {
88    let mut files = Vec::new();
89
90    if let Some(derived) = &receipt.derived {
91        for row in &derived.top.largest_lines {
92            files.push(serde_json::json!({
93                "path": row.path,
94                "module": row.module,
95                "lang": row.lang,
96                "code": row.code,
97                "lines": row.lines,
98                "tokens": row.tokens,
99            }));
100        }
101    }
102
103    // Escape < and > to prevent </script> breakout XSS attacks.
104    // JSON remains valid because \u003c and \u003e are valid JSON string escapes.
105    serde_json::json!({ "files": files })
106        .to_string()
107        .replace('<', "\\u003c")
108        .replace('>', "\\u003e")
109}
110
111fn format_number(n: usize) -> String {
112    if n >= 1_000_000 {
113        format!("{:.1}M", n as f64 / 1_000_000.0)
114    } else if n >= 1_000 {
115        format!("{:.1}K", n as f64 / 1_000.0)
116    } else {
117        n.to_string()
118    }
119}
120
121fn format_pct(ratio: f64) -> String {
122    format!("{:.1}%", ratio * 100.0)
123}
124
125fn escape_html(value: &str) -> String {
126    value
127        .replace('&', "&amp;")
128        .replace('<', "&lt;")
129        .replace('>', "&gt;")
130        .replace('"', "&quot;")
131        .replace('\'', "&#x27;")
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use tokmd_analysis_types::*;
138
139    fn minimal_receipt() -> AnalysisReceipt {
140        AnalysisReceipt {
141            schema_version: 2,
142            generated_at_ms: 0,
143            tool: tokmd_types::ToolInfo {
144                name: "tokmd".to_string(),
145                version: "0.0.0".to_string(),
146            },
147            mode: "analysis".to_string(),
148            status: tokmd_types::ScanStatus::Complete,
149            warnings: vec![],
150            source: AnalysisSource {
151                inputs: vec!["test".to_string()],
152                export_path: None,
153                base_receipt_path: None,
154                export_schema_version: None,
155                export_generated_at_ms: None,
156                base_signature: None,
157                module_roots: vec![],
158                module_depth: 1,
159                children: "collapse".to_string(),
160            },
161            args: AnalysisArgsMeta {
162                preset: "receipt".to_string(),
163                format: "html".to_string(),
164                window_tokens: None,
165                git: None,
166                max_files: None,
167                max_bytes: None,
168                max_commits: None,
169                max_commit_files: None,
170                max_file_bytes: None,
171                import_granularity: "module".to_string(),
172            },
173            archetype: None,
174            topics: None,
175            entropy: None,
176            predictive_churn: None,
177            corporate_fingerprint: None,
178            license: None,
179            derived: None,
180            assets: None,
181            deps: None,
182            git: None,
183            imports: None,
184            dup: None,
185            complexity: None,
186            api_surface: None,
187            effort: None,
188            fun: None,
189        }
190    }
191
192    fn sample_derived() -> DerivedReport {
193        DerivedReport {
194            totals: DerivedTotals {
195                files: 10,
196                code: 1000,
197                comments: 200,
198                blanks: 100,
199                lines: 1300,
200                bytes: 50000,
201                tokens: 2500,
202            },
203            doc_density: RatioReport {
204                total: RatioRow {
205                    key: "total".to_string(),
206                    numerator: 200,
207                    denominator: 1200,
208                    ratio: 0.1667,
209                },
210                by_lang: vec![],
211                by_module: vec![],
212            },
213            whitespace: RatioReport {
214                total: RatioRow {
215                    key: "total".to_string(),
216                    numerator: 100,
217                    denominator: 1300,
218                    ratio: 0.0769,
219                },
220                by_lang: vec![],
221                by_module: vec![],
222            },
223            verbosity: RateReport {
224                total: RateRow {
225                    key: "total".to_string(),
226                    numerator: 50000,
227                    denominator: 1300,
228                    rate: 38.46,
229                },
230                by_lang: vec![],
231                by_module: vec![],
232            },
233            max_file: MaxFileReport {
234                overall: FileStatRow {
235                    path: "src/lib.rs".to_string(),
236                    module: "src".to_string(),
237                    lang: "Rust".to_string(),
238                    code: 500,
239                    comments: 100,
240                    blanks: 50,
241                    lines: 650,
242                    bytes: 25000,
243                    tokens: 1250,
244                    doc_pct: Some(0.167),
245                    bytes_per_line: Some(38.46),
246                    depth: 1,
247                },
248                by_lang: vec![],
249                by_module: vec![],
250            },
251            lang_purity: LangPurityReport { rows: vec![] },
252            nesting: NestingReport {
253                max: 3,
254                avg: 1.5,
255                by_module: vec![],
256            },
257            test_density: TestDensityReport {
258                test_lines: 200,
259                prod_lines: 1000,
260                test_files: 5,
261                prod_files: 5,
262                ratio: 0.2,
263            },
264            boilerplate: BoilerplateReport {
265                infra_lines: 100,
266                logic_lines: 1100,
267                ratio: 0.083,
268                infra_langs: vec!["TOML".to_string()],
269            },
270            polyglot: PolyglotReport {
271                lang_count: 2,
272                entropy: 0.5,
273                dominant_lang: "Rust".to_string(),
274                dominant_lines: 1000,
275                dominant_pct: 0.833,
276            },
277            distribution: DistributionReport {
278                count: 10,
279                min: 50,
280                max: 650,
281                mean: 130.0,
282                median: 100.0,
283                p90: 400.0,
284                p99: 650.0,
285                gini: 0.3,
286            },
287            histogram: vec![HistogramBucket {
288                label: "Small".to_string(),
289                min: 0,
290                max: Some(100),
291                files: 5,
292                pct: 0.5,
293            }],
294            top: TopOffenders {
295                largest_lines: vec![FileStatRow {
296                    path: "src/lib.rs".to_string(),
297                    module: "src".to_string(),
298                    lang: "Rust".to_string(),
299                    code: 500,
300                    comments: 100,
301                    blanks: 50,
302                    lines: 650,
303                    bytes: 25000,
304                    tokens: 1250,
305                    doc_pct: Some(0.167),
306                    bytes_per_line: Some(38.46),
307                    depth: 1,
308                }],
309                largest_tokens: vec![],
310                largest_bytes: vec![],
311                least_documented: vec![],
312                most_dense: vec![],
313            },
314            tree: Some("test-tree".to_string()),
315            reading_time: ReadingTimeReport {
316                minutes: 65.0,
317                lines_per_minute: 20,
318                basis_lines: 1300,
319            },
320            context_window: Some(ContextWindowReport {
321                window_tokens: 100000,
322                total_tokens: 2500,
323                pct: 0.025,
324                fits: true,
325            }),
326            cocomo: Some(CocomoReport {
327                mode: "organic".to_string(),
328                kloc: 1.0,
329                effort_pm: 2.4,
330                duration_months: 2.5,
331                staff: 1.0,
332                a: 2.4,
333                b: 1.05,
334                c: 2.5,
335                d: 0.38,
336            }),
337            todo: Some(TodoReport {
338                total: 5,
339                density_per_kloc: 5.0,
340                tags: vec![TodoTagRow {
341                    tag: "TODO".to_string(),
342                    count: 5,
343                }],
344            }),
345            integrity: IntegrityReport {
346                algo: "blake3".to_string(),
347                hash: "abc123".to_string(),
348                entries: 10,
349            },
350        }
351    }
352
353    #[test]
354    fn format_number_thresholds() {
355        assert_eq!(format_number(500), "500");
356        assert_eq!(format_number(1_000), "1.0K");
357        assert_eq!(format_number(1_500), "1.5K");
358        assert_eq!(format_number(1_000_000), "1.0M");
359        assert_eq!(format_number(2_500_000), "2.5M");
360    }
361
362    #[test]
363    fn escape_html_encodes_special_chars() {
364        assert_eq!(escape_html("hello"), "hello");
365        assert_eq!(escape_html("<script>"), "&lt;script&gt;");
366        assert_eq!(escape_html("a & b"), "a &amp; b");
367        assert_eq!(escape_html("\"quoted\""), "&quot;quoted&quot;");
368        assert_eq!(escape_html("it's"), "it&#x27;s");
369        assert_eq!(
370            escape_html("<a href=\"test\">&'"),
371            "&lt;a href=&quot;test&quot;&gt;&amp;&#x27;"
372        );
373    }
374
375    #[test]
376    fn timestamp_has_expected_shape() {
377        let ts = timestamp_utc();
378        assert!(ts.contains("UTC"));
379        assert!(ts.len() > 10);
380    }
381
382    #[test]
383    fn metrics_cards_empty_without_derived() {
384        let receipt = minimal_receipt();
385        assert!(build_metrics_cards(&receipt).is_empty());
386    }
387
388    #[test]
389    fn metrics_cards_include_context_fit_when_available() {
390        let mut receipt = minimal_receipt();
391        receipt.derived = Some(sample_derived());
392        let cards = build_metrics_cards(&receipt);
393        assert!(cards.contains("class=\"metric-card\""));
394        assert!(cards.contains("Context Fit"));
395    }
396
397    #[test]
398    fn table_rows_are_html_escaped() {
399        let mut receipt = minimal_receipt();
400        let mut derived = sample_derived();
401        derived.top.largest_lines[0].path = "src/<script>.rs".to_string();
402        derived.top.largest_lines[0].module = "mod&name".to_string();
403        derived.top.largest_lines[0].lang = "Ru\"st".to_string();
404        receipt.derived = Some(derived);
405
406        let rows = build_table_rows(&receipt);
407        assert!(rows.contains("src/&lt;script&gt;.rs"));
408        assert!(rows.contains("mod&amp;name"));
409        assert!(rows.contains("Ru&quot;st"));
410    }
411
412    #[test]
413    fn report_json_escapes_angle_brackets() {
414        let mut receipt = minimal_receipt();
415        let mut derived = sample_derived();
416        derived.top.largest_lines[0].path = "</script><script>alert(1)</script>".to_string();
417        receipt.derived = Some(derived);
418
419        let json = build_report_json(&receipt);
420        assert!(
421            json.contains("\\u003c/script\\u003e\\u003cscript\\u003ealert(1)\\u003c/script\\u003e")
422        );
423        assert!(!json.contains('<'));
424        assert!(!json.contains('>'));
425    }
426
427    #[test]
428    fn report_json_without_derived_is_empty_files_array() {
429        let receipt = minimal_receipt();
430        assert_eq!(build_report_json(&receipt), "{\"files\":[]}");
431    }
432
433    #[test]
434    fn render_inlines_template_content() {
435        let mut receipt = minimal_receipt();
436        receipt.derived = Some(sample_derived());
437
438        let html = render(&receipt);
439        assert!(html.contains("<!DOCTYPE html>"));
440        assert!(html.contains("metric-card"));
441        assert!(html.contains("src/lib.rs"));
442        assert!(html.contains("const REPORT_DATA ="));
443    }
444}