Skip to main content

ras_filesystem/application/
csv_normalize.rs

1#[must_use]
2pub fn normalize_csv(raw: &str) -> String {
3    let mut out_lines: Vec<String> = Vec::new();
4    let mut leading_blank = true;
5    for line in raw.lines() {
6        let trimmed = line.trim_end();
7        if trimmed.is_empty() && leading_blank {
8            continue;
9        }
10        leading_blank = false;
11        out_lines.push(normalize_row(trimmed));
12    }
13    while out_lines.last().is_some_and(|l| l.is_empty()) {
14        out_lines.pop();
15    }
16    out_lines.join("\n")
17}
18
19fn normalize_row(line: &str) -> String {
20    let fields = split_respecting_quotes(line);
21    fields
22        .into_iter()
23        .map(quote_if_needed)
24        .collect::<Vec<_>>()
25        .join(",")
26}
27
28fn split_respecting_quotes(line: &str) -> Vec<String> {
29    let mut fields: Vec<String> = Vec::new();
30    let mut buf = String::new();
31    let mut in_quotes = false;
32    let mut field_started_quoted = false;
33    let mut chars = line.chars().peekable();
34    while let Some(c) = chars.next() {
35        match c {
36            '"' if in_quotes => {
37                if matches!(chars.peek(), Some('"')) {
38                    buf.push('"');
39                    chars.next();
40                } else {
41                    in_quotes = false;
42                }
43            }
44            '"' if !field_started_quoted && buf.trim().is_empty() => {
45                in_quotes = true;
46                field_started_quoted = true;
47                buf.clear();
48            }
49            '"' => buf.push('"'),
50            ',' if !in_quotes => {
51                fields.push(buf.trim().to_string());
52                buf.clear();
53                field_started_quoted = false;
54            }
55            _ => buf.push(c),
56        }
57    }
58    fields.push(buf.trim().to_string());
59    fields
60}
61
62fn quote_if_needed(field: String) -> String {
63    let needs = field.contains(',') || field.contains('"') || field.contains('\n');
64    if !needs {
65        return field;
66    }
67    let escaped = field.replace('"', "\"\"");
68    format!("\"{escaped}\"")
69}