reovim_module_codec_csv/
codec.rs1use std::fmt::Write;
8
9use {
10 reovim_driver_annotation::{Annotation, AnnotationKind, AnnotationPayload, AnnotationTarget},
11 reovim_driver_codec::{CodecError, CodecMetadata, ContentType, DecodeResult},
12};
13
14pub const CSV_HEADER_KIND: &str = "content.csv.header";
16
17pub const CSV_COLUMN_KIND: &str = "content.csv.column";
19
20pub struct CsvCodec {
25 delimiter: u8,
27 content_type: &'static str,
29}
30
31impl CsvCodec {
32 #[must_use]
34 pub const fn new(delimiter: u8, content_type: &'static str) -> Self {
35 Self {
36 delimiter,
37 content_type,
38 }
39 }
40
41 #[must_use]
43 pub const fn delimiter(&self) -> u8 {
44 self.delimiter
45 }
46}
47
48impl reovim_driver_codec::ContentCodec for CsvCodec {
49 #[cfg_attr(coverage_nightly, coverage(off))]
50 fn decode(&self, raw: &[u8]) -> Result<DecodeResult, CodecError> {
51 let text = std::str::from_utf8(raw)
52 .map_err(|e| CodecError::Other(format!("CSV decode: invalid UTF-8: {e}")))?;
53
54 let (rows, has_header) = parse_csv(text, self.delimiter);
55 let (content, annotations) = format_table(&rows, has_header);
56
57 let mut metadata = CodecMetadata::new(ContentType::new(self.content_type));
58 metadata.set("delimiter", String::from(self.delimiter as char));
59 metadata.set("has_header", has_header.to_string());
60
61 let line_ending = if text.contains("\r\n") { "crlf" } else { "lf" };
63 metadata.set("line_ending", line_ending);
64
65 Ok(DecodeResult {
66 content,
67 annotations,
68 metadata,
69 lossy: false,
70 readonly: false,
71 })
72 }
73
74 #[cfg_attr(coverage_nightly, coverage(off))]
75 fn encode(
76 &self,
77 content: &str,
78 metadata: &CodecMetadata,
79 ) -> Option<Result<Vec<u8>, CodecError>> {
80 let delimiter = metadata
81 .get("delimiter")
82 .and_then(|s| s.chars().next())
83 .unwrap_or(self.delimiter as char);
84
85 let line_ending = match metadata.get("line_ending") {
86 Some("crlf") => "\r\n",
87 _ => "\n",
88 };
89
90 Some(Ok(encode_csv(content, delimiter, line_ending)))
91 }
92}
93
94fn parse_csv(text: &str, delimiter: u8) -> (Vec<Vec<String>>, bool) {
99 let mut reader = csv::ReaderBuilder::new()
100 .delimiter(delimiter)
101 .has_headers(false)
102 .flexible(true)
103 .from_reader(text.as_bytes());
104
105 let rows: Vec<Vec<String>> = reader
106 .records()
107 .filter_map(Result::ok)
108 .map(|record| record.iter().map(String::from).collect())
109 .collect();
110
111 let has_header = detect_header(&rows);
112 (rows, has_header)
113}
114
115#[cfg_attr(coverage_nightly, coverage(off))]
118fn detect_header(rows: &[Vec<String>]) -> bool {
119 if rows.len() < 2 {
120 return false;
121 }
122
123 let first_row = &rows[0];
124 let first_numeric = first_row
125 .iter()
126 .filter(|f| f.parse::<f64>().is_ok())
127 .count();
128
129 if let Some(second_row) = rows.get(1) {
131 let second_numeric = second_row
132 .iter()
133 .filter(|f| f.parse::<f64>().is_ok())
134 .count();
135 return first_numeric < second_numeric;
136 }
137
138 false
139}
140
141#[cfg_attr(coverage_nightly, coverage(off))]
143fn format_table(rows: &[Vec<String>], has_header: bool) -> (String, Vec<Annotation>) {
144 if rows.is_empty() {
145 return (String::new(), Vec::new());
146 }
147
148 let col_count = rows.iter().map(Vec::len).max().unwrap_or(0);
150 let mut widths = vec![0_usize; col_count];
151
152 for row in rows {
153 for (i, field) in row.iter().enumerate() {
154 if i < col_count {
155 widths[i] = widths[i].max(field.len());
156 }
157 }
158 }
159
160 for w in &mut widths {
162 *w = (*w).max(3);
163 }
164
165 let mut output = String::with_capacity(rows.len() * col_count * 10);
166 let mut annotations = Vec::new();
167
168 let header_kind = AnnotationKind::new(CSV_HEADER_KIND);
169 let column_kind = AnnotationKind::new(CSV_COLUMN_KIND);
170
171 for (line_idx, row) in rows.iter().enumerate() {
172 for (col_idx, field) in row.iter().enumerate() {
174 if col_idx > 0 {
175 output.push_str(" "); }
177 let width = widths.get(col_idx).copied().unwrap_or(3);
178 let _ = write!(output, "{field:<width$}");
179 }
180 output.push('\n');
181
182 if line_idx == 0 && has_header {
184 annotations.push(Annotation {
185 kind: header_kind.clone(),
186 target: AnnotationTarget::Line(line_idx),
187 priority: 0,
188 payload: AnnotationPayload::None,
189 });
190 }
191
192 annotations.push(Annotation {
194 kind: column_kind.clone(),
195 target: AnnotationTarget::Line(line_idx),
196 priority: 0,
197 payload: AnnotationPayload::Number(row.len()),
198 });
199 }
200
201 (output, annotations)
202}
203
204#[cfg_attr(coverage_nightly, coverage(off))]
206fn encode_csv(content: &str, delimiter: char, line_ending: &str) -> Vec<u8> {
207 let mut result = Vec::with_capacity(content.len());
208
209 for line in content.lines() {
210 let fields: Vec<&str> = split_aligned_fields(line);
212
213 for (i, field) in fields.iter().enumerate() {
214 if i > 0 {
215 result.push(delimiter as u8);
216 }
217 let trimmed = field.trim();
218
219 if trimmed.contains(delimiter)
221 || trimmed.contains('"')
222 || trimmed.contains('\n')
223 || trimmed.contains('\r')
224 {
225 result.push(b'"');
226 for ch in trimmed.bytes() {
227 if ch == b'"' {
228 result.push(b'"');
229 }
230 result.push(ch);
231 }
232 result.push(b'"');
233 } else {
234 result.extend_from_slice(trimmed.as_bytes());
235 }
236 }
237
238 result.extend_from_slice(line_ending.as_bytes());
239 }
240
241 result
242}
243
244#[cfg_attr(coverage_nightly, coverage(off))]
249fn split_aligned_fields(line: &str) -> Vec<&str> {
250 if line.is_empty() {
251 return Vec::new();
252 }
253
254 let mut fields = Vec::new();
255 let mut start = 0;
256 let bytes = line.as_bytes();
257 let mut i = 0;
258
259 while i < bytes.len() {
260 if i + 1 < bytes.len() && bytes[i] == b' ' && bytes[i + 1] == b' ' {
262 fields.push(line[start..i].trim_end());
264
265 while i < bytes.len() && bytes[i] == b' ' {
267 i += 1;
268 }
269 start = i;
270 } else {
271 i += 1;
272 }
273 }
274
275 if start < bytes.len() {
277 fields.push(line[start..].trim_end());
278 } else if !fields.is_empty() {
279 fields.push("");
281 }
282
283 fields
284}
285
286#[cfg(test)]
287#[path = "codec_tests.rs"]
288mod tests;