1use crate::cli::InputFormat;
2
3pub fn detect_format(input: &str) -> InputFormat {
4 let trimmed = input.trim();
5
6 if trimmed.is_empty() {
7 return InputFormat::Text;
8 }
9
10 if trimmed.starts_with('{') {
12 return InputFormat::Json;
13 }
14
15 if trimmed.starts_with('[') {
17 let first_line = trimmed.lines().next().unwrap_or("").trim();
19 let is_toml_section = (first_line.starts_with("[[")
20 && first_line.ends_with("]]")
21 && first_line.len() > 4
22 && first_line[2..first_line.len() - 2]
23 .chars()
24 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.'))
25 || (first_line.starts_with('[')
26 && first_line.ends_with(']')
27 && !first_line.starts_with("[[")
28 && first_line[1..first_line.len() - 1]
29 .chars()
30 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.'));
31
32 if !is_toml_section {
33 return InputFormat::Json;
34 }
35 }
37
38 let lines: Vec<&str> = trimmed.lines().collect();
39
40 if looks_like_headers(&lines) {
42 return InputFormat::Headers;
43 }
44
45 if looks_like_toml(&lines) {
47 return InputFormat::Toml;
48 }
49
50 if looks_like_logfmt(&lines) {
52 return InputFormat::Logfmt;
53 }
54
55 if looks_like_env(&lines) {
57 return InputFormat::Env;
58 }
59
60 if looks_like_csv(&lines) {
62 return InputFormat::Csv;
63 }
64
65 if looks_like_yaml(&lines) {
67 return InputFormat::Yaml;
68 }
69
70 InputFormat::Text
71}
72
73fn looks_like_headers(lines: &[&str]) -> bool {
74 if lines.len() < 2 {
75 return false;
76 }
77
78 let is_header_line = |line: &str| -> bool {
79 if let Some(colon_pos) = line.find(':') {
80 let key = &line[..colon_pos];
81 !key.is_empty() && key.chars().all(|c| c.is_ascii_alphabetic() || c == '-')
83 } else {
84 false
85 }
86 };
87
88 let start = if lines[0].starts_with("HTTP/") { 1 } else { 0 };
90 let relevant: Vec<&&str> = lines[start..]
91 .iter()
92 .filter(|l| !l.trim().is_empty())
93 .collect();
94
95 if relevant.len() < 2 {
96 return false;
97 }
98
99 let header_count = relevant.iter().filter(|l| is_header_line(l)).count();
100 if (header_count as f64 / relevant.len() as f64) <= 0.7 {
101 return false;
102 }
103
104 let has_hyphen_key = relevant.iter().any(|line| {
107 if let Some(colon_pos) = line.find(':') {
108 line[..colon_pos].contains('-')
109 } else {
110 false
111 }
112 });
113
114 let uppercase_keys = relevant
115 .iter()
116 .filter(|line| {
117 line.as_bytes()
118 .first()
119 .is_some_and(|b| b.is_ascii_uppercase())
120 })
121 .count();
122
123 has_hyphen_key || uppercase_keys as f64 / relevant.len() as f64 > 0.7
124}
125
126fn looks_like_toml(lines: &[&str]) -> bool {
127 let has_section = lines.iter().any(|l| {
128 let t = l.trim();
129 (t.starts_with('[') && t.ends_with(']') && !t.starts_with("[["))
130 || (t.starts_with("[[") && t.ends_with("]]"))
131 });
132
133 let has_toml_kv = lines.iter().any(|l| {
134 let t = l.trim();
135 if let Some(eq_pos) = t.find(" = ") {
137 let key = &t[..eq_pos];
138 !key.is_empty()
139 && key
140 .chars()
141 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
142 } else {
143 false
144 }
145 });
146
147 has_section || (has_toml_kv && !looks_like_env(lines))
148}
149
150fn looks_like_logfmt(lines: &[&str]) -> bool {
151 let relevant: Vec<&str> = lines
152 .iter()
153 .map(|l| l.trim())
154 .filter(|l| !l.is_empty())
155 .collect();
156
157 if relevant.is_empty() {
158 return false;
159 }
160
161 relevant.iter().all(|line| {
163 let pairs: Vec<&str> = line
164 .split_whitespace()
165 .filter(|token| token.contains('='))
166 .collect();
167 pairs.len() >= 2
168 })
169}
170
171fn looks_like_env(lines: &[&str]) -> bool {
172 let relevant: Vec<&str> = lines
173 .iter()
174 .map(|l| l.trim())
175 .filter(|l| !l.is_empty() && !l.starts_with('#'))
176 .collect();
177
178 if relevant.is_empty() {
179 return false;
180 }
181
182 let env_count = relevant
183 .iter()
184 .filter(|line| {
185 let line = line.strip_prefix("export ").unwrap_or(line);
186 if let Some(eq_pos) = line.find('=') {
187 let key = &line[..eq_pos];
188 !key.is_empty()
190 && !key.contains(' ')
191 && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
192 && key
193 .chars()
194 .next()
195 .is_some_and(|c| c.is_ascii_uppercase() || c == '_')
196 } else {
197 false
198 }
199 })
200 .count();
201
202 env_count as f64 / relevant.len() as f64 > 0.7
203}
204
205fn looks_like_csv(lines: &[&str]) -> bool {
206 let non_empty: Vec<&str> = lines
207 .iter()
208 .map(|l| l.trim())
209 .filter(|l| !l.is_empty())
210 .collect();
211
212 if non_empty.len() < 2 {
213 return false;
214 }
215
216 let comma_counts: Vec<usize> = non_empty.iter().map(|l| l.matches(',').count()).collect();
218
219 if comma_counts[0] >= 1 && comma_counts.iter().all(|&c| c == comma_counts[0]) {
220 return true;
221 }
222
223 let tab_counts: Vec<usize> = non_empty.iter().map(|l| l.matches('\t').count()).collect();
225
226 tab_counts[0] >= 1 && tab_counts.iter().all(|&c| c == tab_counts[0])
227}
228
229fn looks_like_yaml(lines: &[&str]) -> bool {
230 if lines.is_empty() {
231 return false;
232 }
233
234 let first = lines[0].trim();
235 if first == "---" {
236 return true;
237 }
238
239 let yaml_like = lines
241 .iter()
242 .filter(|l| {
243 let t = l.trim();
244 if t.is_empty() || t.starts_with('#') {
245 return false;
246 }
247 if let Some(colon_pos) = t.find(':') {
248 let after_colon = &t[colon_pos + 1..];
249 after_colon.is_empty() || after_colon.starts_with(' ')
250 } else {
251 t.starts_with("- ") }
253 })
254 .count();
255
256 let non_empty = lines.iter().filter(|l| !l.trim().is_empty()).count();
257
258 non_empty > 0 && yaml_like as f64 / non_empty as f64 > 0.5
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn detect_json_object() {
267 assert_eq!(detect_format("{\"a\": 1}"), InputFormat::Json);
268 }
269
270 #[test]
271 fn detect_json_array() {
272 assert_eq!(detect_format("[1, 2, 3]"), InputFormat::Json);
273 }
274
275 #[test]
276 fn detect_json_whitespace() {
277 assert_eq!(
278 detect_format(" \n {\"key\": \"val\"} "),
279 InputFormat::Json
280 );
281 }
282
283 #[test]
284 fn detect_yaml_document() {
285 assert_eq!(
286 detect_format("---\nname: Alice\nage: 30"),
287 InputFormat::Yaml
288 );
289 }
290
291 #[test]
292 fn detect_yaml_kv() {
293 assert_eq!(
294 detect_format("name: Alice\nage: 30\ncity: NYC"),
295 InputFormat::Yaml
296 );
297 }
298
299 #[test]
300 fn detect_toml_with_section() {
301 assert_eq!(
302 detect_format("[package]\nname = \"pick\"\nversion = \"0.1.0\""),
303 InputFormat::Toml
304 );
305 }
306
307 #[test]
308 fn detect_toml_array_of_tables() {
309 assert_eq!(
310 detect_format("[[items]]\nname = \"a\"\n\n[[items]]\nname = \"b\""),
311 InputFormat::Toml
312 );
313 }
314
315 #[test]
316 fn detect_env() {
317 assert_eq!(
318 detect_format("DATABASE_URL=postgres://localhost/db\nPORT=3000\nDEBUG=true"),
319 InputFormat::Env
320 );
321 }
322
323 #[test]
324 fn detect_env_with_export() {
325 assert_eq!(
326 detect_format("export DATABASE_URL=postgres://localhost/db\nexport PORT=3000"),
327 InputFormat::Env
328 );
329 }
330
331 #[test]
332 fn detect_env_with_comments() {
333 assert_eq!(
334 detect_format("# Database config\nDATABASE_URL=postgres://localhost/db\nPORT=3000"),
335 InputFormat::Env
336 );
337 }
338
339 #[test]
340 fn detect_headers() {
341 assert_eq!(
342 detect_format(
343 "Content-Type: application/json\nX-Request-Id: abc123\nCache-Control: no-cache"
344 ),
345 InputFormat::Headers
346 );
347 }
348
349 #[test]
350 fn detect_headers_with_status() {
351 assert_eq!(
352 detect_format("HTTP/1.1 200 OK\nContent-Type: text/html\nContent-Length: 1234"),
353 InputFormat::Headers
354 );
355 }
356
357 #[test]
358 fn detect_logfmt() {
359 assert_eq!(
360 detect_format("level=info msg=\"request handled\" duration=0.5s status=200"),
361 InputFormat::Logfmt
362 );
363 }
364
365 #[test]
366 fn detect_logfmt_multiline() {
367 assert_eq!(
368 detect_format("level=info msg=hello ts=123\nlevel=error msg=fail ts=456"),
369 InputFormat::Logfmt
370 );
371 }
372
373 #[test]
374 fn detect_csv() {
375 assert_eq!(
376 detect_format("name,age,city\nAlice,30,NYC\nBob,25,LA"),
377 InputFormat::Csv
378 );
379 }
380
381 #[test]
382 fn detect_tsv() {
383 assert_eq!(
384 detect_format("name\tage\tcity\nAlice\t30\tNYC\nBob\t25\tLA"),
385 InputFormat::Csv
386 );
387 }
388
389 #[test]
390 fn detect_empty_input() {
391 assert_eq!(detect_format(""), InputFormat::Text);
392 assert_eq!(detect_format(" \n "), InputFormat::Text);
393 }
394
395 #[test]
396 fn detect_plain_text() {
397 assert_eq!(
398 detect_format("just some random text here"),
399 InputFormat::Text
400 );
401 }
402}