#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn detect_format(content: &str) -> String {
if serde_json::from_str::<serde_json::Value>(content).is_ok() {
return "json".to_string();
}
let markdown_signals: [&dyn Fn(&str) -> bool; 5] = [
&|s: &str| {
s.lines().any(|l| {
let t = l.trim_start_matches(' ');
t.starts_with("# ")
|| t.starts_with("## ")
|| t.starts_with("### ")
|| t.starts_with("#### ")
|| t.starts_with("##### ")
|| t.starts_with("###### ")
})
},
&|s: &str| {
s.lines()
.any(|l| l.trim_start().starts_with("- ") || l.trim_start().starts_with("* "))
},
&|s: &str| {
s.lines().any(|l| {
let trimmed = l.trim_start();
trimmed.len() > 2
&& trimmed.starts_with(|c: char| c.is_ascii_digit())
&& trimmed.contains(". ")
})
},
&|s: &str| s.contains("```"),
&|s: &str| contains_markdown_link(s),
];
let score = markdown_signals.iter().filter(|f| f(content)).count();
if score >= 2 {
return "markdown".to_string();
}
"text".to_string()
}
fn contains_markdown_link(s: &str) -> bool {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'[' {
if let Some(close_bracket) = bytes[i..].iter().position(|&b| b == b']') {
let j = i + close_bracket;
if j + 1 < bytes.len() && bytes[j + 1] == b'(' && bytes[j + 1..].contains(&b')') {
return true;
}
}
}
i += 1;
}
false
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn contains_binary_content(content: &str) -> bool {
let scan_length = content.len().min(8192);
content[..scan_length].contains(|c: char| (c as u32) <= 8)
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn find_overlong_line(content: &str, max_chars: u32) -> u32 {
let max = max_chars as usize;
let mut line_num = 1u32;
let mut line_start = 0usize;
for (i, ch) in content.char_indices() {
if ch == '\n' {
let line_len = i - line_start;
if line_len > max {
return line_num;
}
line_num += 1;
line_start = i + 1;
}
}
let last_len = content.len() - line_start;
if last_len > max {
return line_num;
}
0
}
pub const DEFAULT_MAX_CONTENT_BYTES: u64 = 5 * 1024 * 1024;
pub const DEFAULT_MAX_LINE_BYTES: u32 = 64 * 1024;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn default_max_content_bytes() -> u64 {
DEFAULT_MAX_CONTENT_BYTES
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
pub fn default_max_line_bytes() -> u32 {
DEFAULT_MAX_LINE_BYTES
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_format_json() {
assert_eq!(detect_format(r#"{"a":1}"#), "json");
assert_eq!(detect_format("null"), "json");
assert_eq!(detect_format("[1,2,3]"), "json");
}
#[test]
fn detect_format_markdown() {
assert_eq!(detect_format("# Title\n- item"), "markdown");
assert_eq!(detect_format("## Heading\n```code```"), "markdown");
assert_eq!(detect_format("1. first\n[link](url)"), "markdown");
}
#[test]
fn detect_format_text() {
assert_eq!(detect_format("Hello world"), "text");
assert_eq!(detect_format("just plain text here"), "text");
assert_eq!(detect_format("# Only one signal"), "text");
}
#[test]
fn byte_identity_vec1_json() {
assert_eq!(detect_format(r#"{"a":1}"#), "json");
}
#[test]
fn byte_identity_vec2_markdown() {
assert_eq!(detect_format("# Title\n- item"), "markdown");
}
#[test]
fn byte_identity_vec3_text() {
assert_eq!(detect_format("Hello"), "text");
}
#[test]
fn binary_detects_null_byte() {
let mut s = String::from("hello");
s.push('\x00');
assert!(contains_binary_content(&s));
}
#[test]
fn binary_detects_control_char_0x08() {
let mut s = String::from("hello");
s.push('\x08');
assert!(contains_binary_content(&s));
}
#[test]
fn binary_clean_content() {
assert!(!contains_binary_content("hello world\nnewlines\ttabs"));
}
#[test]
fn binary_only_scans_first_8kb() {
let mut s = "a".repeat(8193);
s.push('\x00');
assert!(!contains_binary_content(&s));
}
#[test]
fn overlong_no_violation() {
assert_eq!(find_overlong_line("short\nlines\nhere", 100), 0);
}
#[test]
fn overlong_first_line() {
let line = "a".repeat(101);
assert_eq!(find_overlong_line(&line, 100), 1);
}
#[test]
fn overlong_second_line() {
let content = format!("ok\n{}\nok", "b".repeat(101));
assert_eq!(find_overlong_line(&content, 100), 2);
}
#[test]
fn overlong_empty() {
assert_eq!(find_overlong_line("", 100), 0);
}
}