1use sarif_rust::parser::SarifValidator;
17use sarif_rust::{Level, ResultBuilder, RunBuilder, SarifLogBuilder, ToolBuilder};
18use sarif_to_md_core::markdown::sarif::generator::SarifMarkdownGenerator;
19use sarif_to_md_core::markdown::MarkdownFormat;
20use sarif_to_md_core::ReportProcessorBuilder;
21
22use crate::validate::Validity;
23
24const RULE_ID: &str = "invalid-utf8";
26const TOOL_NAME: &str = "simdutf8-cli";
28
29#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, clap::ValueEnum)]
31pub enum OutputFormat {
32 #[default]
34 Text,
35 Json,
37 Sarif,
39 Markdown,
41}
42
43#[derive(Clone, Debug)]
46pub struct Finding {
47 pub label: String,
49 pub validity: Validity,
51}
52
53#[derive(Debug, thiserror::Error)]
55pub enum ReportError {
56 #[error("SARIF generation failed: {0}")]
58 Sarif(String),
59 #[error("Markdown generation failed: {0}")]
61 Markdown(String),
62}
63
64#[must_use]
69pub fn json_escape(input: &str) -> String {
70 let mut out = String::with_capacity(input.len());
71 for ch in input.chars() {
72 match ch {
73 '"' => out.push_str("\\\""),
74 '\\' => out.push_str("\\\\"),
75 '\n' => out.push_str("\\n"),
76 '\r' => out.push_str("\\r"),
77 '\t' => out.push_str("\\t"),
78 '\u{0008}' => out.push_str("\\b"),
79 '\u{000C}' => out.push_str("\\f"),
80 c if u32::from(c) < 0x20 => {
81 let byte = u8::try_from(u32::from(c)).unwrap_or(0);
83 out.push_str("\\u00");
84 out.push(hex_nibble(byte >> 4));
85 out.push(hex_nibble(byte & 0x0F));
86 },
87 c => out.push(c),
88 }
89 }
90 out
91}
92
93const fn hex_nibble(nibble: u8) -> char {
95 match nibble {
96 0..=9 => (b'0' + nibble) as char,
97 _ => (b'a' + nibble - 10) as char,
98 }
99}
100
101fn path_to_uri(path: &str) -> String {
109 let mut out = String::with_capacity(path.len());
110 for &byte in path.as_bytes() {
111 match byte {
112 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' | b'/' => {
113 out.push(char::from(byte));
114 },
115 _ => {
116 out.push('%');
117 out.push(hex_nibble(byte >> 4).to_ascii_uppercase());
118 out.push(hex_nibble(byte & 0x0F).to_ascii_uppercase());
119 },
120 }
121 }
122 if out.is_empty() {
123 out.push('.');
124 }
125 out
126}
127
128fn invalid_detail(error_len: Option<usize>) -> String {
130 match error_len {
131 Some(1) => "1 invalid byte".to_owned(),
132 Some(n) => format!("{n} invalid bytes"),
133 None => "incomplete sequence".to_owned(),
134 }
135}
136
137#[must_use]
139pub fn text_line(label: &str, validity: &Validity) -> String {
140 match validity {
141 Validity::Valid => format!("OK {label}"),
142 Validity::Invalid {
143 valid_up_to,
144 error_len,
145 } => {
146 let detail = invalid_detail(*error_len);
147 format!("FAIL {label}: invalid UTF-8 at byte {valid_up_to} ({detail})")
148 },
149 }
150}
151
152#[must_use]
154pub fn json_record(label: &str, validity: &Validity) -> String {
155 let path = json_escape(label);
156 match validity {
157 Validity::Valid => format!(r#"{{"path":"{path}","valid":true}}"#),
158 Validity::Invalid {
159 valid_up_to,
160 error_len,
161 } => {
162 let error_len = error_len.map_or_else(|| "null".to_owned(), |len| len.to_string());
163 format!(
164 r#"{{"path":"{path}","valid":false,"valid_up_to":{valid_up_to},"error_len":{error_len}}}"#
165 )
166 },
167 }
168}
169
170#[must_use]
172pub fn text_block(findings: &[Finding]) -> String {
173 let mut out = String::new();
174 for finding in findings {
175 out.push_str(&text_line(&finding.label, &finding.validity));
176 out.push('\n');
177 }
178 out
179}
180
181#[must_use]
183pub fn json_block(findings: &[Finding]) -> String {
184 let records: Vec<String> = findings
185 .iter()
186 .map(|finding| json_record(&finding.label, &finding.validity))
187 .collect();
188 format!("[{}]\n", records.join(","))
189}
190
191pub fn build_sarif(findings: &[Finding]) -> std::result::Result<String, ReportError> {
201 let tool = ToolBuilder::new(TOOL_NAME)
202 .with_version(env!("CARGO_PKG_VERSION"))
203 .add_simple_rule(RULE_ID, "Invalid UTF-8")
204 .build();
205 let mut run = RunBuilder::new(tool);
206
207 for finding in findings {
208 let (level, message) = match finding.validity {
209 Validity::Valid => (Level::None, format!("{}: valid UTF-8", finding.label)),
210 Validity::Invalid {
211 valid_up_to,
212 error_len,
213 } => (
214 Level::Error,
215 format!(
216 "{}: invalid UTF-8 at byte {valid_up_to} ({})",
217 finding.label,
218 invalid_detail(error_len)
219 ),
220 ),
221 };
222 let result = ResultBuilder::with_text_message(message)
223 .with_rule_id(RULE_ID)
224 .with_level(level)
225 .add_file_location(path_to_uri(&finding.label), 1, 1)
228 .build();
229 run = run.add_result(result);
230 }
231
232 let log = SarifLogBuilder::with_standard_schema()
233 .add_run(run.build())
234 .build()
235 .map_err(|error| ReportError::Sarif(error.to_string()))?;
236
237 SarifValidator::strict()
238 .validate_sarif_log(&log)
239 .map_err(|error| ReportError::Sarif(error.to_string()))?;
240
241 sarif_rust::to_string_pretty(&log).map_err(|error| ReportError::Sarif(error.to_string()))
242}
243
244pub fn sarif_to_markdown(sarif_json: &str) -> std::result::Result<String, ReportError> {
252 let generator = SarifMarkdownGenerator::new(MarkdownFormat::GitHubFlavored, true);
253 let processor = ReportProcessorBuilder::new()
254 .generator(generator)
255 .content(sarif_json.to_owned())
256 .build()
257 .map_err(|error| ReportError::Markdown(error.to_string()))?;
258 let markdown = processor
259 .generate()
260 .map_err(|error| ReportError::Markdown(error.to_string()))?;
261 validate_markdown(&markdown)?;
262 Ok(markdown)
263}
264
265fn validate_markdown(markdown: &str) -> std::result::Result<(), ReportError> {
272 if markdown.trim().is_empty() {
273 return Err(ReportError::Markdown("output is empty".to_owned()));
274 }
275 if !markdown.contains('#') && !markdown.contains('|') && !markdown.contains("---") {
276 return Err(ReportError::Markdown(
277 "output missing expected structure (no headings, tables, or rules)".to_owned(),
278 ));
279 }
280 Ok(())
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 #[test]
288 fn json_escape_passes_through_plain_text() {
289 assert_eq!(json_escape("plain"), "plain");
290 }
291
292 #[test]
293 fn json_escape_handles_quotes_and_backslashes() {
294 assert_eq!(json_escape(r#"a"b\c"#), r#"a\"b\\c"#);
295 }
296
297 #[test]
298 fn json_escape_handles_control_characters() {
299 assert_eq!(json_escape("line\nbreak\ttab"), "line\\nbreak\\ttab");
300 assert_eq!(json_escape("\u{0001}"), "\\u0001");
302 }
303
304 #[test]
305 fn text_line_marks_valid_inputs() {
306 let line = text_line("file.txt", &Validity::Valid);
307 assert!(line.contains("OK"), "got: {line}");
308 assert!(line.contains("file.txt"), "got: {line}");
309 }
310
311 #[test]
312 fn text_line_marks_invalid_inputs_with_location() {
313 let v = Validity::Invalid {
314 valid_up_to: 3,
315 error_len: Some(1),
316 };
317 let line = text_line("bad.bin", &v);
318 assert!(line.contains("FAIL"), "got: {line}");
319 assert!(line.contains("bad.bin"), "got: {line}");
320 assert!(line.contains('3'), "got: {line}");
321 }
322
323 #[test]
324 fn json_record_for_valid_input() {
325 let rec = json_record("file.txt", &Validity::Valid);
326 assert!(rec.contains(r#""valid":true"#), "got: {rec}");
327 assert!(rec.contains(r#""path":"file.txt""#), "got: {rec}");
328 }
329
330 #[test]
331 fn json_record_for_invalid_input() {
332 let v = Validity::Invalid {
333 valid_up_to: 3,
334 error_len: None,
335 };
336 let rec = json_record("bad.bin", &v);
337 assert!(rec.contains(r#""valid":false"#), "got: {rec}");
338 assert!(rec.contains(r#""valid_up_to":3"#), "got: {rec}");
339 assert!(rec.contains(r#""error_len":null"#), "got: {rec}");
340 }
341
342 #[test]
343 fn json_record_escapes_the_path() {
344 let rec = json_record(r#"a"b"#, &Validity::Valid);
345 assert!(rec.contains(r#""path":"a\"b""#), "got: {rec}");
346 }
347
348 fn sample_findings() -> Vec<Finding> {
349 vec![
350 Finding {
351 label: "ok.txt".to_owned(),
352 validity: Validity::Valid,
353 },
354 Finding {
355 label: "bad.bin".to_owned(),
356 validity: Validity::Invalid {
357 valid_up_to: 3,
358 error_len: Some(1),
359 },
360 },
361 ]
362 }
363
364 #[test]
365 fn build_sarif_is_valid_and_parses() {
366 let json = build_sarif(&sample_findings()).expect("sarif builds & validates");
367 assert!(json.contains("2.1.0"), "expected schema version: {json}");
368 assert!(json.contains(RULE_ID), "expected rule id: {json}");
369 let parsed = sarif_rust::from_str(&json);
371 assert!(parsed.is_ok(), "SARIF should re-parse: {parsed:?}");
372 }
373
374 #[test]
375 fn markdown_from_sarif_has_structure() {
376 let json = build_sarif(&sample_findings()).unwrap();
377 let md = sarif_to_markdown(&json).expect("markdown generates");
378 assert!(
379 md.contains('#') || md.contains('|') || md.contains("---"),
380 "markdown should be structured: {md}"
381 );
382 }
383
384 #[test]
385 fn validate_markdown_rejects_empty() {
386 assert!(validate_markdown(" \n ").is_err());
387 }
388
389 #[test]
390 fn build_sarif_handles_paths_with_spaces_and_unicode() {
391 let findings = vec![
394 Finding {
395 label: "my café/файл .txt".to_owned(),
396 validity: Validity::Invalid {
397 valid_up_to: 0,
398 error_len: Some(1),
399 },
400 },
401 Finding {
402 label: "C:/weird?name*.bin".to_owned(),
403 validity: Validity::Valid,
404 },
405 ];
406 let json = build_sarif(&findings).expect("tricky paths must still validate");
407 assert!(sarif_rust::from_str(&json).is_ok());
408 assert!(json.contains("my café/файл .txt"), "message lost the label");
411 }
412
413 #[test]
414 fn path_to_uri_encodes_specials_keeps_separators() {
415 assert_eq!(path_to_uri("a/b.txt"), "a/b.txt");
416 assert_eq!(path_to_uri("a b"), "a%20b");
417 assert_eq!(path_to_uri("café"), "caf%C3%A9");
418 assert_eq!(path_to_uri(""), ".");
419 }
420}