1#[allow(unused_imports)]
13use base64::{Engine as _, engine::general_purpose};
14
15pub use crate::types::FileData;
16use runtara_agent_macro::{CapabilityInput, capability};
17#[allow(unused_imports)]
18use serde::{Deserialize, Serialize};
19#[allow(unused_imports)]
20use serde_json::Value;
21#[allow(unused_imports)]
22use std::collections::HashMap;
23
24#[derive(Debug, Deserialize)]
30#[serde(untagged)]
31pub enum CsvDataInput {
32 Bytes(Vec<u8>),
34 File(FileData),
36 Base64String(String),
38}
39
40impl CsvDataInput {
41 pub fn to_bytes(&self) -> Result<Vec<u8>, String> {
43 match self {
44 CsvDataInput::Bytes(b) => Ok(b.clone()),
45 CsvDataInput::File(f) => f.decode(),
46 CsvDataInput::Base64String(s) => general_purpose::STANDARD
47 .decode(s)
48 .map_err(|e| format!("Failed to decode base64 CSV content: {}", e)),
49 }
50 }
51}
52
53#[derive(Debug, Deserialize, CapabilityInput)]
54#[capability_input(display_name = "Parse CSV Input")]
55pub struct FromCsvInput {
56 #[field(
58 display_name = "CSV Data",
59 description = "Raw CSV data as bytes, base64 encoded string, or file data object"
60 )]
61 pub data: CsvDataInput,
62
63 #[field(
65 display_name = "Encoding",
66 description = "Character encoding of the CSV data",
67 example = "UTF-8",
68 default = "UTF-8"
69 )]
70 #[serde(default = "default_encoding")]
71 pub encoding: String,
72
73 #[field(
75 display_name = "Delimiter",
76 description = "Column delimiter character",
77 example = ",",
78 default = ","
79 )]
80 #[serde(default = "default_delimiter")]
81 pub delimiter: char,
82
83 #[field(
85 display_name = "Quote Character",
86 description = "Character used to quote fields containing delimiters",
87 example = "\"",
88 default = "\""
89 )]
90 #[serde(default = "default_quote_char")]
91 pub quote_char: char,
92
93 #[field(
95 display_name = "Escape Character",
96 description = "Character used to escape special characters (optional)"
97 )]
98 #[serde(default)]
99 pub escape_char: Option<char>,
100
101 #[field(
103 display_name = "Use Header",
104 description = "Whether the first row contains column headers",
105 example = "true",
106 default = "true"
107 )]
108 #[serde(default = "default_true")]
109 pub use_header: bool,
110
111 #[field(
113 display_name = "Skip Empty Lines",
114 description = "Whether to skip empty lines in the CSV",
115 example = "true",
116 default = "true"
117 )]
118 #[serde(default = "default_true")]
119 pub skip_empty_lines: bool,
120
121 #[field(
123 display_name = "Trim Whitespace",
124 description = "Whether to trim whitespace from field values",
125 example = "false",
126 default = "false"
127 )]
128 #[serde(default)]
129 pub trim_whitespace: bool,
130}
131
132#[derive(Debug, Deserialize, CapabilityInput)]
133#[capability_input(display_name = "Generate CSV Input")]
134pub struct ToCsvInput {
135 #[field(
137 display_name = "Value",
138 description = "Data to convert to CSV (array of objects, array of arrays, or single object)",
139 example = r#"[{"name": "Alice", "age": 30}]"#
140 )]
141 pub value: Value,
142
143 #[field(
145 display_name = "Encoding",
146 description = "Character encoding for the output CSV",
147 example = "UTF-8",
148 default = "UTF-8"
149 )]
150 #[serde(default = "default_encoding")]
151 pub encoding: String,
152
153 #[field(
155 display_name = "Delimiter",
156 description = "Column delimiter character",
157 example = ",",
158 default = ","
159 )]
160 #[serde(default = "default_delimiter")]
161 pub delimiter: char,
162
163 #[field(
165 display_name = "Quote Character",
166 description = "Character used to quote fields containing delimiters",
167 example = "\"",
168 default = "\""
169 )]
170 #[serde(default = "default_quote_char")]
171 pub quote_char: char,
172
173 #[field(
175 display_name = "Escape Character",
176 description = "Character used to escape special characters (optional)"
177 )]
178 #[serde(default)]
179 pub escape_char: Option<char>,
180
181 #[field(
183 display_name = "Use Header",
184 description = "Whether to include a header row in the output",
185 example = "true",
186 default = "true"
187 )]
188 #[serde(default = "default_true")]
189 pub use_header: bool,
190}
191
192#[derive(Debug, Deserialize, CapabilityInput)]
193#[capability_input(display_name = "Get CSV Header Input")]
194pub struct GetHeaderInput {
195 #[field(
197 display_name = "CSV Data",
198 description = "Raw CSV data as bytes, base64 encoded string, or file data object"
199 )]
200 pub data: CsvDataInput,
201
202 #[field(
204 display_name = "Encoding",
205 description = "Character encoding of the CSV data",
206 example = "UTF-8",
207 default = "UTF-8"
208 )]
209 #[serde(default = "default_encoding")]
210 pub encoding: String,
211
212 #[field(
214 display_name = "Delimiter",
215 description = "Column delimiter character",
216 example = ",",
217 default = ","
218 )]
219 #[serde(default = "default_delimiter")]
220 pub delimiter: char,
221
222 #[field(
224 display_name = "Quote Character",
225 description = "Character used to quote fields containing delimiters",
226 example = "\"",
227 default = "\""
228 )]
229 #[serde(default = "default_quote_char")]
230 pub quote_char: char,
231
232 #[field(
234 display_name = "Escape Character",
235 description = "Character used to escape special characters (optional)"
236 )]
237 #[serde(default)]
238 pub escape_char: Option<char>,
239
240 #[field(
242 display_name = "Use Header",
243 description = "Whether the first row contains column headers",
244 example = "true",
245 default = "true"
246 )]
247 #[serde(default = "default_true")]
248 pub use_header: bool,
249
250 #[field(
252 display_name = "Skip Empty Lines",
253 description = "Whether to skip empty lines in the CSV",
254 example = "true",
255 default = "true"
256 )]
257 #[serde(default = "default_true")]
258 pub skip_empty_lines: bool,
259
260 #[field(
262 display_name = "Trim Whitespace",
263 description = "Whether to trim whitespace from field values",
264 example = "false",
265 default = "false"
266 )]
267 #[serde(default)]
268 pub trim_whitespace: bool,
269}
270
271#[derive(Debug, Serialize)]
272pub struct HeaderInfo {
273 pub name: String,
274 #[serde(rename = "type")]
275 pub data_type: String,
276}
277
278fn default_encoding() -> String {
280 "UTF-8".to_string()
281}
282
283fn default_delimiter() -> char {
284 ','
285}
286
287fn default_quote_char() -> char {
288 '"'
289}
290
291fn default_true() -> bool {
292 true
293}
294
295#[capability(
303 module = "csv",
304 display_name = "Parse CSV",
305 description = "Parse CSV bytes into a JSON array of objects or arrays"
306)]
307pub fn from_csv(input: FromCsvInput) -> Result<Vec<Value>, String> {
308 let data = input.data.to_bytes()?;
310 let csv_string = decode_bytes(&data, &input.encoding)?;
311
312 let mut reader_builder = csv::ReaderBuilder::new();
314 reader_builder
315 .delimiter(input.delimiter as u8)
316 .quote(input.quote_char as u8)
317 .has_headers(input.use_header)
318 .trim(if input.trim_whitespace {
319 csv::Trim::All
320 } else {
321 csv::Trim::None
322 });
323
324 if let Some(escape) = input.escape_char {
325 reader_builder.escape(Some(escape as u8));
326 }
327
328 let mut reader = reader_builder.from_reader(csv_string.as_bytes());
329 let mut result = Vec::new();
330
331 if input.use_header {
332 let headers = reader
334 .headers()
335 .map_err(|e| format!("Failed to read CSV headers: {}", e))?
336 .clone();
337
338 for record_result in reader.records() {
339 let record = record_result.map_err(|e| format!("Failed to read CSV record: {}", e))?;
340
341 if input.skip_empty_lines && is_empty_record(&record) {
343 continue;
344 }
345
346 let mut row_map = serde_json::Map::new();
347 for (i, field) in record.iter().enumerate() {
348 let column_name = headers
349 .get(i)
350 .map(|s| s.to_string())
351 .unwrap_or_else(|| i.to_string());
352
353 if !column_name.is_empty() {
354 row_map.insert(column_name, Value::String(field.to_string()));
355 }
356 }
357
358 result.push(Value::Object(row_map));
359 }
360 } else {
361 for record_result in reader.records() {
363 let record = record_result.map_err(|e| format!("Failed to read CSV record: {}", e))?;
364
365 if input.skip_empty_lines && is_empty_record(&record) {
367 continue;
368 }
369
370 let row: Vec<Value> = record
371 .iter()
372 .map(|field| Value::String(field.to_string()))
373 .collect();
374
375 result.push(Value::Array(row));
376 }
377 }
378
379 Ok(result)
380}
381
382#[capability(
384 module = "csv",
385 display_name = "Generate CSV",
386 description = "Convert JSON data to CSV bytes"
387)]
388pub fn to_csv(input: ToCsvInput) -> Result<Vec<u8>, String> {
389 let mut writer_builder = csv::WriterBuilder::new();
390 writer_builder
391 .delimiter(input.delimiter as u8)
392 .quote(input.quote_char as u8);
393
394 if let Some(escape) = input.escape_char {
395 writer_builder.escape(escape as u8);
396 }
397
398 let mut output = Vec::new();
399
400 {
401 let mut writer = writer_builder.from_writer(&mut output);
402
403 match &input.value {
404 Value::Array(arr) => {
405 if !arr.is_empty() {
406 if input.use_header {
408 let headers = get_header_names(&arr[0]);
409 writer
410 .write_record(&headers)
411 .map_err(|e| format!("Failed to write CSV header: {}", e))?;
412 }
413
414 for item in arr {
416 let row = value_to_csv_row(item);
417 writer
418 .write_record(&row)
419 .map_err(|e| format!("Failed to write CSV row: {}", e))?;
420 }
421 }
422 }
423 single_value => {
424 if input.use_header {
426 let headers = get_header_names(single_value);
427 writer
428 .write_record(&headers)
429 .map_err(|e| format!("Failed to write CSV header: {}", e))?;
430 }
431
432 let row = value_to_csv_row(single_value);
433 writer
434 .write_record(&row)
435 .map_err(|e| format!("Failed to write CSV row: {}", e))?;
436 }
437 }
438
439 writer
440 .flush()
441 .map_err(|e| format!("Failed to flush CSV writer: {}", e))?;
442 }
443 Ok(output)
446}
447
448#[capability(
450 module = "csv",
451 display_name = "Get CSV Header",
452 description = "Extract CSV headers with type inference from the first data row"
453)]
454pub fn get_header(input: GetHeaderInput) -> Result<HashMap<String, String>, String> {
455 let data = input.data.to_bytes()?;
457 let csv_string = decode_bytes(&data, &input.encoding)?;
458
459 let mut reader_builder = csv::ReaderBuilder::new();
461 reader_builder
462 .delimiter(input.delimiter as u8)
463 .quote(input.quote_char as u8)
464 .has_headers(input.use_header)
465 .trim(if input.trim_whitespace {
466 csv::Trim::All
467 } else {
468 csv::Trim::None
469 });
470
471 if let Some(escape) = input.escape_char {
472 reader_builder.escape(Some(escape as u8));
473 }
474
475 let mut reader = reader_builder.from_reader(csv_string.as_bytes());
476 let mut result = HashMap::new();
477
478 let headers: Vec<String> = if input.use_header {
480 reader
481 .headers()
482 .map_err(|e| format!("Failed to read CSV headers: {}", e))?
483 .iter()
484 .enumerate()
485 .map(|(i, h)| {
486 if h.is_empty() {
487 i.to_string()
488 } else {
489 h.to_string()
490 }
491 })
492 .collect()
493 } else {
494 let first_record = reader
496 .records()
497 .next()
498 .ok_or("CSV file is empty")?
499 .map_err(|e| format!("Failed to read first record: {}", e))?;
500
501 (0..first_record.len())
502 .map(|i| format!("Column {}", i + 1))
503 .collect()
504 };
505
506 let first_data_row = reader.records().next();
508
509 if let Some(record_result) = first_data_row {
510 let record = record_result.map_err(|e| format!("Failed to read data row: {}", e))?;
511
512 for (i, header) in headers.iter().enumerate() {
513 let inferred_type = if let Some(value) = record.get(i) {
514 infer_type(value)
515 } else {
516 "String".to_string()
517 };
518
519 result.insert(header.clone(), inferred_type);
520 }
521 } else {
522 for header in headers {
524 result.insert(header, "String".to_string());
525 }
526 }
527
528 Ok(result)
529}
530
531fn decode_bytes(data: &[u8], encoding: &str) -> Result<String, String> {
537 match encoding.to_uppercase().as_str() {
538 "UTF-8" | "UTF8" => {
539 String::from_utf8(data.to_vec()).map_err(|e| format!("Failed to decode UTF-8: {}", e))
540 }
541 "LATIN-1" | "LATIN1" | "ISO-8859-1" | "ISO88591" | "WINDOWS-1252" | "CP1252" => {
542 let (decoded, _, had_errors) = encoding_rs::WINDOWS_1252.decode(data);
544 if had_errors {
545 Ok(decoded.into_owned())
547 } else {
548 Ok(decoded.into_owned())
549 }
550 }
551 _ => {
552 if let Some(enc) = encoding_rs::Encoding::for_label(encoding.as_bytes()) {
554 let (decoded, _, _) = enc.decode(data);
555 Ok(decoded.into_owned())
556 } else {
557 Ok(String::from_utf8_lossy(data).into_owned())
559 }
560 }
561 }
562}
563
564fn is_empty_record(record: &csv::StringRecord) -> bool {
566 record.is_empty() || (record.len() == 1 && record.get(0).is_some_and(|s| s.trim().is_empty()))
567}
568
569fn get_header_names(value: &Value) -> Vec<String> {
571 match value {
572 Value::Object(map) => map.keys().map(|k| k.to_string()).collect(),
573 Value::Array(arr) => (1..=arr.len()).map(|i| format!("Column {}", i)).collect(),
574 _ => vec!["value".to_string()],
575 }
576}
577
578fn value_to_csv_row(value: &Value) -> Vec<String> {
580 match value {
581 Value::Object(map) => map.values().map(value_to_string).collect(),
582 Value::Array(arr) => arr.iter().map(value_to_string).collect(),
583 _ => vec![value_to_string(value)],
584 }
585}
586
587fn value_to_string(value: &Value) -> String {
589 match value {
590 Value::Null => String::new(),
591 Value::Bool(b) => b.to_string(),
592 Value::Number(n) => n.to_string(),
593 Value::String(s) => s.clone(),
594 Value::Array(_) | Value::Object(_) => {
595 serde_json::to_string(value).unwrap_or_else(|_| String::new())
597 }
598 }
599}
600
601fn infer_type(value: &str) -> String {
603 if let Ok(json_value) = serde_json::from_str::<Value>(value) {
605 match json_value {
606 Value::Bool(_) => return "Boolean".to_string(),
607 Value::Number(n) => {
608 if n.is_i64() {
609 return "Integer".to_string();
610 } else if n.is_f64() {
611 return "Double".to_string();
612 }
613 }
614 _ => {}
615 }
616 }
617
618 "String".to_string()
619}
620
621#[cfg(test)]
626mod tests {
627 use super::*;
628 use serde_json::json;
629
630 #[test]
631 fn test_from_csv_with_headers() {
632 let csv_data = b"name,age,active\nAlice,30,true\nBob,25,false";
633 let input = FromCsvInput {
634 data: CsvDataInput::Bytes(csv_data.to_vec()),
635 encoding: "UTF-8".to_string(),
636 delimiter: ',',
637 quote_char: '"',
638 escape_char: None,
639 use_header: true,
640 skip_empty_lines: true,
641 trim_whitespace: false,
642 };
643
644 let result = from_csv(input).unwrap();
645 assert_eq!(result.len(), 2);
646
647 assert_eq!(result[0]["name"], "Alice");
648 assert_eq!(result[0]["age"], "30");
649 assert_eq!(result[0]["active"], "true");
650
651 assert_eq!(result[1]["name"], "Bob");
652 assert_eq!(result[1]["age"], "25");
653 }
654
655 #[test]
656 fn test_from_csv_without_headers() {
657 let csv_data = b"Alice,30\nBob,25";
658 let input = FromCsvInput {
659 data: CsvDataInput::Bytes(csv_data.to_vec()),
660 encoding: "UTF-8".to_string(),
661 delimiter: ',',
662 quote_char: '"',
663 escape_char: None,
664 use_header: false,
665 skip_empty_lines: true,
666 trim_whitespace: false,
667 };
668
669 let result = from_csv(input).unwrap();
670 assert_eq!(result.len(), 2);
671
672 if let Value::Array(row1) = &result[0] {
673 assert_eq!(row1[0], "Alice");
674 assert_eq!(row1[1], "30");
675 } else {
676 panic!("Expected array");
677 }
678 }
679
680 #[test]
681 fn test_from_csv_custom_delimiter() {
682 let csv_data = b"name;age\nAlice;30\nBob;25";
683 let input = FromCsvInput {
684 data: CsvDataInput::Bytes(csv_data.to_vec()),
685 encoding: "UTF-8".to_string(),
686 delimiter: ';',
687 quote_char: '"',
688 escape_char: None,
689 use_header: true,
690 skip_empty_lines: true,
691 trim_whitespace: false,
692 };
693
694 let result = from_csv(input).unwrap();
695 assert_eq!(result.len(), 2);
696 assert_eq!(result[0]["name"], "Alice");
697 }
698
699 #[test]
700 fn test_from_csv_skip_empty_lines() {
701 let csv_data = b"name,age\n\nAlice,30\n\nBob,25\n";
702 let input = FromCsvInput {
703 data: CsvDataInput::Bytes(csv_data.to_vec()),
704 encoding: "UTF-8".to_string(),
705 delimiter: ',',
706 quote_char: '"',
707 escape_char: None,
708 use_header: true,
709 skip_empty_lines: true,
710 trim_whitespace: false,
711 };
712
713 let result = from_csv(input).unwrap();
714 assert_eq!(result.len(), 2);
715 }
716
717 #[test]
718 fn test_from_csv_base64_string() {
719 let csv_data = b"name,age\nAlice,30";
720 let encoded = base64::engine::general_purpose::STANDARD.encode(csv_data);
721 let input = FromCsvInput {
722 data: CsvDataInput::Base64String(encoded),
723 encoding: "UTF-8".to_string(),
724 delimiter: ',',
725 quote_char: '"',
726 escape_char: None,
727 use_header: true,
728 skip_empty_lines: true,
729 trim_whitespace: false,
730 };
731
732 let result = from_csv(input).unwrap();
733 assert_eq!(result.len(), 1);
734 assert_eq!(result[0]["name"], "Alice");
735 }
736
737 #[test]
738 fn test_to_csv_array_of_objects() {
739 let data = json!([
740 {"name": "Alice", "age": 30},
741 {"name": "Bob", "age": 25}
742 ]);
743
744 let input = ToCsvInput {
745 value: data,
746 encoding: "UTF-8".to_string(),
747 delimiter: ',',
748 quote_char: '"',
749 escape_char: None,
750 use_header: true,
751 };
752
753 let result = to_csv(input).unwrap();
754 let csv_string = String::from_utf8(result).unwrap();
755
756 assert!(csv_string.contains("name"));
757 assert!(csv_string.contains("age"));
758 assert!(csv_string.contains("Alice"));
759 assert!(csv_string.contains("30"));
760 }
761
762 #[test]
763 fn test_to_csv_without_header() {
764 let data = json!([
765 {"name": "Alice", "age": 30}
766 ]);
767
768 let input = ToCsvInput {
769 value: data,
770 encoding: "UTF-8".to_string(),
771 delimiter: ',',
772 quote_char: '"',
773 escape_char: None,
774 use_header: false,
775 };
776
777 let result = to_csv(input).unwrap();
778 let csv_string = String::from_utf8(result).unwrap();
779
780 assert!(!csv_string.starts_with("name"));
782 assert!(csv_string.contains("Alice"));
783 }
784
785 #[test]
786 fn test_to_csv_custom_delimiter() {
787 let data = json!([{"name": "Alice", "age": 30}]);
788
789 let input = ToCsvInput {
790 value: data,
791 encoding: "UTF-8".to_string(),
792 delimiter: ';',
793 quote_char: '"',
794 escape_char: None,
795 use_header: true,
796 };
797
798 let result = to_csv(input).unwrap();
799 let csv_string = String::from_utf8(result).unwrap();
800
801 assert!(csv_string.contains(';'));
802 }
803
804 #[test]
805 fn test_get_header_with_type_inference() {
806 let csv_data = b"name,age,active\nAlice,30,true";
807 let input = GetHeaderInput {
808 data: CsvDataInput::Bytes(csv_data.to_vec()),
809 encoding: "UTF-8".to_string(),
810 delimiter: ',',
811 quote_char: '"',
812 escape_char: None,
813 use_header: true,
814 skip_empty_lines: true,
815 trim_whitespace: false,
816 };
817
818 let result = get_header(input).unwrap();
819
820 assert_eq!(result.get("name"), Some(&"String".to_string()));
821 assert_eq!(result.get("age"), Some(&"Integer".to_string()));
822 assert_eq!(result.get("active"), Some(&"Boolean".to_string()));
823 }
824
825 #[test]
826 fn test_get_header_without_headers() {
827 let csv_data = b"Alice,30,true\nBob,25,false";
828 let input = GetHeaderInput {
829 data: CsvDataInput::Bytes(csv_data.to_vec()),
830 encoding: "UTF-8".to_string(),
831 delimiter: ',',
832 quote_char: '"',
833 escape_char: None,
834 use_header: false,
835 skip_empty_lines: true,
836 trim_whitespace: false,
837 };
838
839 let result = get_header(input).unwrap();
840
841 assert!(result.contains_key("Column 1"));
843 assert!(result.contains_key("Column 2"));
844 assert!(result.contains_key("Column 3"));
845 }
846
847 #[test]
848 fn test_infer_type() {
849 assert_eq!(infer_type("true"), "Boolean");
850 assert_eq!(infer_type("false"), "Boolean");
851 assert_eq!(infer_type("42"), "Integer");
852 assert_eq!(infer_type("3.14"), "Double");
853 assert_eq!(infer_type("hello"), "String");
854 assert_eq!(infer_type(""), "String");
855 }
856}