synaptic_loaders/
csv_loader.rs1use std::collections::HashMap;
2
3use crate::Document;
4use async_trait::async_trait;
5use serde_json::Value;
6use synaptic_core::SynapticError;
7
8use crate::Loader;
9
10pub struct CsvLoader {
16 data: String,
17 content_column: Option<String>,
18 id_column: Option<String>,
19}
20
21impl CsvLoader {
22 pub fn new(data: impl Into<String>) -> Self {
23 Self {
24 data: data.into(),
25 content_column: None,
26 id_column: None,
27 }
28 }
29
30 pub fn with_content_column(mut self, column: impl Into<String>) -> Self {
31 self.content_column = Some(column.into());
32 self
33 }
34
35 pub fn with_id_column(mut self, column: impl Into<String>) -> Self {
36 self.id_column = Some(column.into());
37 self
38 }
39}
40
41#[async_trait]
42impl Loader for CsvLoader {
43 async fn load(&self) -> Result<Vec<Document>, SynapticError> {
44 let mut reader = csv::Reader::from_reader(self.data.as_bytes());
45 let headers = reader
46 .headers()
47 .map_err(|e| SynapticError::Loader(format!("CSV header error: {e}")))?
48 .clone();
49
50 let mut docs = Vec::new();
51
52 for (i, result) in reader.records().enumerate() {
53 let record =
54 result.map_err(|e| SynapticError::Loader(format!("CSV row {i} error: {e}")))?;
55
56 let id = if let Some(id_col) = &self.id_column {
57 let idx = headers
58 .iter()
59 .position(|h| h == id_col.as_str())
60 .ok_or_else(|| {
61 SynapticError::Loader(format!("id column '{id_col}' not found"))
62 })?;
63 record.get(idx).unwrap_or("").to_string()
64 } else {
65 format!("row-{i}")
66 };
67
68 let content = if let Some(content_col) = &self.content_column {
69 let idx = headers
70 .iter()
71 .position(|h| h == content_col.as_str())
72 .ok_or_else(|| {
73 SynapticError::Loader(format!("content column '{content_col}' not found"))
74 })?;
75 record.get(idx).unwrap_or("").to_string()
76 } else {
77 record.iter().collect::<Vec<&str>>().join(" ")
79 };
80
81 let mut metadata = HashMap::new();
83 for (j, header) in headers.iter().enumerate() {
84 if let Some(value) = record.get(j) {
85 metadata.insert(header.to_string(), Value::String(value.to_string()));
86 }
87 }
88
89 docs.push(Document::with_metadata(id, content, metadata));
90 }
91
92 Ok(docs)
93 }
94}