Skip to main content

harness_read/
binary.rs

1use std::path::Path;
2
3use crate::constants::BINARY_EXTENSIONS;
4
5pub fn is_binary_by_extension(filepath: &str) -> bool {
6    let ext = Path::new(filepath)
7        .extension()
8        .and_then(|s| s.to_str())
9        .map(|s| format!(".{}", s.to_ascii_lowercase()));
10    match ext {
11        Some(e) => BINARY_EXTENSIONS.contains(&e.as_str()),
12        None => false,
13    }
14}
15
16pub fn is_binary_by_content(sample: &[u8]) -> bool {
17    if sample.is_empty() {
18        return false;
19    }
20    let mut non_printable = 0usize;
21    for &b in sample {
22        if b == 0 {
23            return true;
24        }
25        if b < 9 || (b > 13 && b < 32) {
26            non_printable += 1;
27        }
28    }
29    (non_printable as f64 / sample.len() as f64) > 0.3
30}
31
32pub fn is_binary(filepath: &str, sample: &[u8]) -> bool {
33    is_binary_by_extension(filepath) || is_binary_by_content(sample)
34}
35
36pub fn is_image_mime(mime: &str) -> bool {
37    mime.starts_with("image/") && mime != "image/svg+xml"
38}
39
40pub fn is_pdf_mime(mime: &str) -> bool {
41    mime == "application/pdf"
42}
43
44/// Minimal MIME-from-extension guesser. We only need to distinguish
45/// attachments (images/PDF) from text from binary; the TS version uses
46/// mime-type lib but the subset of extensions we care about is tiny.
47pub fn mime_for(path: &str) -> String {
48    let lower = path.to_ascii_lowercase();
49    let ext = Path::new(&lower)
50        .extension()
51        .and_then(|s| s.to_str())
52        .unwrap_or("")
53        .to_string();
54    match ext.as_str() {
55        "png" => "image/png",
56        "jpg" | "jpeg" => "image/jpeg",
57        "gif" => "image/gif",
58        "bmp" => "image/bmp",
59        "webp" => "image/webp",
60        "tif" | "tiff" => "image/tiff",
61        "ico" => "image/x-icon",
62        "svg" => "image/svg+xml",
63        "pdf" => "application/pdf",
64        "json" => "application/json",
65        "xml" => "application/xml",
66        "html" | "htm" => "text/html",
67        "css" => "text/css",
68        "js" => "application/javascript",
69        "ts" | "tsx" | "md" | "txt" | "csv" => "text/plain",
70        _ => "application/octet-stream",
71    }
72    .to_string()
73}