1use std::fs::File;
25use std::marker::PhantomData;
26use std::os::raw::c_int;
27use std::path::Path;
28use std::str;
29
30use crate::ffi;
31use crate::types::Null;
32use crate::vtab::{
33 escape_double_quote, parse_boolean, read_only_module, Context, CreateVTab, IndexInfo, VTab,
34 VTabConfig, VTabConnection, VTabCursor, VTabKind, Values,
35};
36use crate::{Connection, Error, Result};
37
38pub fn load_module(conn: &Connection) -> Result<()> {
50 let aux: Option<()> = None;
51 conn.create_module("csv", read_only_module::<CsvTab>(), aux)
52}
53
54#[repr(C)]
56struct CsvTab {
57 base: ffi::sqlite3_vtab,
59 filename: String,
61 has_headers: bool,
62 delimiter: u8,
63 quote: u8,
64 offset_first_row: csv::Position,
66}
67
68impl CsvTab {
69 fn reader(&self) -> Result<csv::Reader<File>, csv::Error> {
70 csv::ReaderBuilder::new()
71 .has_headers(self.has_headers)
72 .delimiter(self.delimiter)
73 .quote(self.quote)
74 .from_path(&self.filename)
75 }
76
77 fn parse_byte(arg: &str) -> Option<u8> {
78 if arg.len() == 1 {
79 arg.bytes().next()
80 } else {
81 None
82 }
83 }
84}
85
86unsafe impl<'vtab> VTab<'vtab> for CsvTab {
87 type Aux = ();
88 type Cursor = CsvTabCursor<'vtab>;
89
90 fn connect(
91 db: &mut VTabConnection,
92 _aux: Option<&()>,
93 args: &[&[u8]],
94 ) -> Result<(String, CsvTab)> {
95 if args.len() < 4 {
96 return Err(Error::ModuleError("no CSV file specified".to_owned()));
97 }
98
99 let mut vtab = CsvTab {
100 base: ffi::sqlite3_vtab::default(),
101 filename: "".to_owned(),
102 has_headers: false,
103 delimiter: b',',
104 quote: b'"',
105 offset_first_row: csv::Position::new(),
106 };
107 let mut schema = None;
108 let mut n_col = None;
109
110 let args = &args[3..];
111 for c_slice in args {
112 let (param, value) = super::parameter(c_slice)?;
113 match param {
114 "filename" => {
115 if !Path::new(value).exists() {
116 return Err(Error::ModuleError(format!(
117 "file '{}' does not exist",
118 value
119 )));
120 }
121 vtab.filename = value.to_owned();
122 }
123 "schema" => {
124 schema = Some(value.to_owned());
125 }
126 "columns" => {
127 if let Ok(n) = value.parse::<u16>() {
128 if n_col.is_some() {
129 return Err(Error::ModuleError(
130 "more than one 'columns' parameter".to_owned(),
131 ));
132 } else if n == 0 {
133 return Err(Error::ModuleError(
134 "must have at least one column".to_owned(),
135 ));
136 }
137 n_col = Some(n);
138 } else {
139 return Err(Error::ModuleError(format!(
140 "unrecognized argument to 'columns': {}",
141 value
142 )));
143 }
144 }
145 "header" => {
146 if let Some(b) = parse_boolean(value) {
147 vtab.has_headers = b;
148 } else {
149 return Err(Error::ModuleError(format!(
150 "unrecognized argument to 'header': {}",
151 value
152 )));
153 }
154 }
155 "delimiter" => {
156 if let Some(b) = CsvTab::parse_byte(value) {
157 vtab.delimiter = b;
158 } else {
159 return Err(Error::ModuleError(format!(
160 "unrecognized argument to 'delimiter': {}",
161 value
162 )));
163 }
164 }
165 "quote" => {
166 if let Some(b) = CsvTab::parse_byte(value) {
167 if b == b'0' {
168 vtab.quote = 0;
169 } else {
170 vtab.quote = b;
171 }
172 } else {
173 return Err(Error::ModuleError(format!(
174 "unrecognized argument to 'quote': {}",
175 value
176 )));
177 }
178 }
179 _ => {
180 return Err(Error::ModuleError(format!(
181 "unrecognized parameter '{}'",
182 param
183 )));
184 }
185 }
186 }
187
188 if vtab.filename.is_empty() {
189 return Err(Error::ModuleError("no CSV file specified".to_owned()));
190 }
191
192 let mut cols: Vec<String> = Vec::new();
193 if vtab.has_headers || (n_col.is_none() && schema.is_none()) {
194 let mut reader = vtab.reader()?;
195 if vtab.has_headers {
196 {
197 let headers = reader.headers()?;
198 if n_col.is_none() && schema.is_none() {
200 cols = headers
201 .into_iter()
202 .map(|header| escape_double_quote(header).into_owned())
203 .collect();
204 }
205 }
206 vtab.offset_first_row = reader.position().clone();
207 } else {
208 let mut record = csv::ByteRecord::new();
209 if reader.read_byte_record(&mut record)? {
210 for (i, _) in record.iter().enumerate() {
211 cols.push(format!("c{i}"));
212 }
213 }
214 }
215 } else if let Some(n_col) = n_col {
216 for i in 0..n_col {
217 cols.push(format!("c{i}"));
218 }
219 }
220
221 if cols.is_empty() && schema.is_none() {
222 return Err(Error::ModuleError("no column specified".to_owned()));
223 }
224
225 if schema.is_none() {
226 let mut sql = String::from("CREATE TABLE x(");
227 for (i, col) in cols.iter().enumerate() {
228 sql.push('"');
229 sql.push_str(col);
230 sql.push_str("\" TEXT");
231 if i == cols.len() - 1 {
232 sql.push_str(");");
233 } else {
234 sql.push_str(", ");
235 }
236 }
237 schema = Some(sql);
238 }
239 db.config(VTabConfig::DirectOnly)?;
240 Ok((schema.unwrap(), vtab))
241 }
242
243 fn best_index(&self, info: &mut IndexInfo) -> Result<()> {
245 info.set_estimated_cost(1_000_000.);
246 Ok(())
247 }
248
249 fn open(&mut self) -> Result<CsvTabCursor<'_>> {
250 Ok(CsvTabCursor::new(self.reader()?))
251 }
252}
253
254impl CreateVTab<'_> for CsvTab {
255 const KIND: VTabKind = VTabKind::Default;
256}
257
258#[repr(C)]
260struct CsvTabCursor<'vtab> {
261 base: ffi::sqlite3_vtab_cursor,
263 reader: csv::Reader<File>,
265 row_number: usize,
267 cols: csv::StringRecord,
269 eof: bool,
270 phantom: PhantomData<&'vtab CsvTab>,
271}
272
273impl CsvTabCursor<'_> {
274 fn new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab> {
275 CsvTabCursor {
276 base: ffi::sqlite3_vtab_cursor::default(),
277 reader,
278 row_number: 0,
279 cols: csv::StringRecord::new(),
280 eof: false,
281 phantom: PhantomData,
282 }
283 }
284
285 fn vtab(&self) -> &CsvTab {
287 unsafe { &*(self.base.pVtab as *const CsvTab) }
288 }
289}
290
291unsafe impl VTabCursor for CsvTabCursor<'_> {
292 fn filter(
295 &mut self,
296 _idx_num: c_int,
297 _idx_str: Option<&str>,
298 _args: &Values<'_>,
299 ) -> Result<()> {
300 {
301 let offset_first_row = self.vtab().offset_first_row.clone();
302 self.reader.seek(offset_first_row)?;
303 }
304 self.row_number = 0;
305 self.next()
306 }
307
308 fn next(&mut self) -> Result<()> {
309 {
310 self.eof = self.reader.is_done();
311 if self.eof {
312 return Ok(());
313 }
314
315 self.eof = !self.reader.read_record(&mut self.cols)?;
316 }
317
318 self.row_number += 1;
319 Ok(())
320 }
321
322 fn eof(&self) -> bool {
323 self.eof
324 }
325
326 fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> {
327 if col < 0 || col as usize >= self.cols.len() {
328 return Err(Error::ModuleError(format!(
329 "column index out of bounds: {}",
330 col
331 )));
332 }
333 if self.cols.is_empty() {
334 return ctx.set_result(&Null);
335 }
336 ctx.set_result(&self.cols[col as usize].to_owned())
338 }
339
340 fn rowid(&self) -> Result<i64> {
341 Ok(self.row_number as i64)
342 }
343}
344
345impl From<csv::Error> for Error {
346 #[cold]
347 fn from(err: csv::Error) -> Error {
348 Error::ModuleError(err.to_string())
349 }
350}
351
352#[cfg(test)]
353mod test {
354 use crate::vtab::csvtab;
355 use crate::{Connection, Result};
356 use fallible_iterator::FallibleIterator;
357
358 #[test]
359 fn test_csv_module() -> Result<()> {
360 let db = Connection::open_in_memory()?;
361 csvtab::load_module(&db)?;
362 db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
363
364 {
365 let mut s = db.prepare("SELECT rowid, * FROM vtab")?;
366 {
367 let headers = s.column_names();
368 assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers);
369 }
370
371 let ids: Result<Vec<i32>> = s.query([])?.map(|row| row.get::<_, i32>(0)).collect();
372 let sum = ids?.iter().sum::<i32>();
373 assert_eq!(sum, 15);
374 }
375 db.execute_batch("DROP TABLE vtab")
376 }
377
378 #[test]
379 fn test_csv_cursor() -> Result<()> {
380 let db = Connection::open_in_memory()?;
381 csvtab::load_module(&db)?;
382 db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
383
384 {
385 let mut s = db.prepare(
386 "SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \
387 v1.rowid < v2.rowid",
388 )?;
389
390 let mut rows = s.query([])?;
391 let row = rows.next()?.unwrap();
392 assert_eq!(row.get_unwrap::<_, i32>(0), 2);
393 }
394 db.execute_batch("DROP TABLE vtab")
395 }
396}