use crate::cli::InputFormat;
pub fn detect_format(input: &str) -> InputFormat {
let trimmed = input.trim();
if trimmed.is_empty() {
return InputFormat::Text;
}
if trimmed.starts_with('{') {
return InputFormat::Json;
}
if trimmed.starts_with('[') {
let first_line = trimmed.lines().next().unwrap_or("").trim();
let is_toml_section = (first_line.starts_with("[[")
&& first_line.ends_with("]]")
&& first_line.len() > 4
&& first_line[2..first_line.len() - 2]
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.'))
|| (first_line.starts_with('[')
&& first_line.ends_with(']')
&& !first_line.starts_with("[[")
&& first_line[1..first_line.len() - 1]
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.'));
if !is_toml_section {
return InputFormat::Json;
}
}
let lines: Vec<&str> = trimmed.lines().collect();
if looks_like_headers(&lines) {
return InputFormat::Headers;
}
if looks_like_toml(&lines) {
return InputFormat::Toml;
}
if looks_like_logfmt(&lines) {
return InputFormat::Logfmt;
}
if looks_like_env(&lines) {
return InputFormat::Env;
}
if looks_like_csv(&lines) {
return InputFormat::Csv;
}
if looks_like_yaml(&lines) {
return InputFormat::Yaml;
}
InputFormat::Text
}
fn looks_like_headers(lines: &[&str]) -> bool {
if lines.len() < 2 {
return false;
}
let is_header_line = |line: &str| -> bool {
if let Some(colon_pos) = line.find(':') {
let key = &line[..colon_pos];
!key.is_empty() && key.chars().all(|c| c.is_ascii_alphabetic() || c == '-')
} else {
false
}
};
let start = if lines[0].starts_with("HTTP/") { 1 } else { 0 };
let relevant: Vec<&&str> = lines[start..]
.iter()
.filter(|l| !l.trim().is_empty())
.collect();
if relevant.len() < 2 {
return false;
}
let header_count = relevant.iter().filter(|l| is_header_line(l)).count();
if (header_count as f64 / relevant.len() as f64) <= 0.7 {
return false;
}
let has_hyphen_key = relevant.iter().any(|line| {
if let Some(colon_pos) = line.find(':') {
line[..colon_pos].contains('-')
} else {
false
}
});
let uppercase_keys = relevant
.iter()
.filter(|line| {
line.as_bytes()
.first()
.is_some_and(|b| b.is_ascii_uppercase())
})
.count();
has_hyphen_key || uppercase_keys as f64 / relevant.len() as f64 > 0.7
}
fn looks_like_toml(lines: &[&str]) -> bool {
let has_section = lines.iter().any(|l| {
let t = l.trim();
(t.starts_with('[') && t.ends_with(']') && !t.starts_with("[["))
|| (t.starts_with("[[") && t.ends_with("]]"))
});
let has_toml_kv = lines.iter().any(|l| {
let t = l.trim();
if let Some(eq_pos) = t.find(" = ") {
let key = &t[..eq_pos];
!key.is_empty()
&& key
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
} else {
false
}
});
has_section || (has_toml_kv && !looks_like_env(lines))
}
fn looks_like_logfmt(lines: &[&str]) -> bool {
let relevant: Vec<&str> = lines
.iter()
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.collect();
if relevant.is_empty() {
return false;
}
relevant.iter().all(|line| {
let pairs: Vec<&str> = line
.split_whitespace()
.filter(|token| token.contains('='))
.collect();
pairs.len() >= 2
})
}
fn looks_like_env(lines: &[&str]) -> bool {
let relevant: Vec<&str> = lines
.iter()
.map(|l| l.trim())
.filter(|l| !l.is_empty() && !l.starts_with('#'))
.collect();
if relevant.is_empty() {
return false;
}
let env_count = relevant
.iter()
.filter(|line| {
let line = line.strip_prefix("export ").unwrap_or(line);
if let Some(eq_pos) = line.find('=') {
let key = &line[..eq_pos];
!key.is_empty()
&& !key.contains(' ')
&& key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
&& key
.chars()
.next()
.is_some_and(|c| c.is_ascii_uppercase() || c == '_')
} else {
false
}
})
.count();
env_count as f64 / relevant.len() as f64 > 0.7
}
fn looks_like_csv(lines: &[&str]) -> bool {
let non_empty: Vec<&str> = lines
.iter()
.map(|l| l.trim())
.filter(|l| !l.is_empty())
.collect();
if non_empty.len() < 2 {
return false;
}
let comma_counts: Vec<usize> = non_empty.iter().map(|l| l.matches(',').count()).collect();
if comma_counts[0] >= 1 && comma_counts.iter().all(|&c| c == comma_counts[0]) {
return true;
}
let tab_counts: Vec<usize> = non_empty.iter().map(|l| l.matches('\t').count()).collect();
tab_counts[0] >= 1 && tab_counts.iter().all(|&c| c == tab_counts[0])
}
fn looks_like_yaml(lines: &[&str]) -> bool {
if lines.is_empty() {
return false;
}
let first = lines[0].trim();
if first == "---" {
return true;
}
let yaml_like = lines
.iter()
.filter(|l| {
let t = l.trim();
if t.is_empty() || t.starts_with('#') {
return false;
}
if let Some(colon_pos) = t.find(':') {
let after_colon = &t[colon_pos + 1..];
after_colon.is_empty() || after_colon.starts_with(' ')
} else {
t.starts_with("- ") }
})
.count();
let non_empty = lines.iter().filter(|l| !l.trim().is_empty()).count();
non_empty > 0 && yaml_like as f64 / non_empty as f64 > 0.5
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_json_object() {
assert_eq!(detect_format("{\"a\": 1}"), InputFormat::Json);
}
#[test]
fn detect_json_array() {
assert_eq!(detect_format("[1, 2, 3]"), InputFormat::Json);
}
#[test]
fn detect_json_whitespace() {
assert_eq!(
detect_format(" \n {\"key\": \"val\"} "),
InputFormat::Json
);
}
#[test]
fn detect_yaml_document() {
assert_eq!(
detect_format("---\nname: Alice\nage: 30"),
InputFormat::Yaml
);
}
#[test]
fn detect_yaml_kv() {
assert_eq!(
detect_format("name: Alice\nage: 30\ncity: NYC"),
InputFormat::Yaml
);
}
#[test]
fn detect_toml_with_section() {
assert_eq!(
detect_format("[package]\nname = \"pick\"\nversion = \"0.1.0\""),
InputFormat::Toml
);
}
#[test]
fn detect_toml_array_of_tables() {
assert_eq!(
detect_format("[[items]]\nname = \"a\"\n\n[[items]]\nname = \"b\""),
InputFormat::Toml
);
}
#[test]
fn detect_env() {
assert_eq!(
detect_format("DATABASE_URL=postgres://localhost/db\nPORT=3000\nDEBUG=true"),
InputFormat::Env
);
}
#[test]
fn detect_env_with_export() {
assert_eq!(
detect_format("export DATABASE_URL=postgres://localhost/db\nexport PORT=3000"),
InputFormat::Env
);
}
#[test]
fn detect_env_with_comments() {
assert_eq!(
detect_format("# Database config\nDATABASE_URL=postgres://localhost/db\nPORT=3000"),
InputFormat::Env
);
}
#[test]
fn detect_headers() {
assert_eq!(
detect_format(
"Content-Type: application/json\nX-Request-Id: abc123\nCache-Control: no-cache"
),
InputFormat::Headers
);
}
#[test]
fn detect_headers_with_status() {
assert_eq!(
detect_format("HTTP/1.1 200 OK\nContent-Type: text/html\nContent-Length: 1234"),
InputFormat::Headers
);
}
#[test]
fn detect_logfmt() {
assert_eq!(
detect_format("level=info msg=\"request handled\" duration=0.5s status=200"),
InputFormat::Logfmt
);
}
#[test]
fn detect_logfmt_multiline() {
assert_eq!(
detect_format("level=info msg=hello ts=123\nlevel=error msg=fail ts=456"),
InputFormat::Logfmt
);
}
#[test]
fn detect_csv() {
assert_eq!(
detect_format("name,age,city\nAlice,30,NYC\nBob,25,LA"),
InputFormat::Csv
);
}
#[test]
fn detect_tsv() {
assert_eq!(
detect_format("name\tage\tcity\nAlice\t30\tNYC\nBob\t25\tLA"),
InputFormat::Csv
);
}
#[test]
fn detect_empty_input() {
assert_eq!(detect_format(""), InputFormat::Text);
assert_eq!(detect_format(" \n "), InputFormat::Text);
}
#[test]
fn detect_plain_text() {
assert_eq!(
detect_format("just some random text here"),
InputFormat::Text
);
}
}