1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4use std::{
96 borrow::Cow,
97 collections::{BTreeMap, BTreeSet, HashSet, VecDeque},
98 io::Cursor,
99 path::Path,
100 sync::{Arc, Mutex, OnceLock},
101};
102
103use arrow::{
104 array::{
105 Array, BooleanArray, BooleanBuilder, Date32Array, Date64Array, Float64Array,
106 Float64Builder, Int64Array, Int64Builder, RecordBatch, StringArray, StringBuilder,
107 TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
108 TimestampSecondArray,
109 },
110 datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit},
111};
112use csv::{ReaderBuilder, StringRecord, WriterBuilder};
113use dta::stata::{
114 dta::{
115 byte_order::ByteOrder, dta_reader::DtaReader, dta_writer::DtaWriter, header::Header,
116 release::Release, schema::Schema as StataSchema, value::Value as StataValue,
117 variable::Variable, variable_type::VariableType,
118 },
119 missing_value::MissingValue,
120 stata_double::StataDouble,
121 stata_long::StataLong,
122};
123use fp_columnar::{Column, ColumnError};
124use fp_frame::{DataFrame, FrameError, Series, ToDatetimeOptions, to_datetime_with_options};
125use fp_index::{Index, IndexError, IndexLabel, format_datetime_ns};
126use fp_types::{DType, NullKind, Scalar, Timedelta, Timestamp, cast_scalar_owned};
127#[cfg(feature = "hdf5")]
128use hdf5::File as Hdf5File;
129use orc_rust::{
130 ArrowReaderBuilder as OrcArrowReaderBuilder, ArrowWriterBuilder as OrcArrowWriterBuilder,
131};
132use parquet::arrow::{ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder};
133use quick_xml::{Reader as XmlReader, XmlVersion, events::Event};
134use scraper::{ElementRef, Html, Selector};
135use thiserror::Error;
136
137#[derive(Debug, Error)]
138#[non_exhaustive]
139pub enum IoError {
140 #[error("csv input has no headers")]
141 MissingHeaders,
142 #[error("csv index column '{0}' not found in headers")]
143 MissingIndexColumn(String),
144 #[error("duplicate column name '{0}'")]
145 DuplicateColumnName(String),
146 #[error("usecols contains missing columns: {0:?}")]
147 MissingUsecols(Vec<String>),
148 #[error("parse_dates contains missing columns: {0:?}")]
149 MissingParseDateColumns(Vec<String>),
150 #[error("json format error: {0}")]
151 JsonFormat(String),
152 #[error("parquet error: {0}")]
153 Parquet(String),
154 #[error("orc error: {0}")]
155 Orc(String),
156 #[error("hdf5 error: {0}")]
157 Hdf5(String),
158 #[error("excel error: {0}")]
159 Excel(String),
160 #[error("html error: {0}")]
161 Html(String),
162 #[error("xml error: {0}")]
163 Xml(String),
164 #[error("pickle error: {0}")]
165 Pickle(String),
166 #[error("stata error: {0}")]
167 Stata(String),
168 #[error("fwf error: {0}")]
169 Fwf(String),
170 #[error("deferred reader: {0}")]
171 Deferred(String),
172 #[error("arrow ipc error: {0}")]
173 Arrow(String),
174 #[error("sql error: {0}")]
175 Sql(String),
176 #[error(transparent)]
177 Csv(#[from] csv::Error),
178 #[error(transparent)]
179 Json(#[from] serde_json::Error),
180 #[error(transparent)]
181 Io(#[from] std::io::Error),
182 #[error(transparent)]
183 Utf8(#[from] std::string::FromUtf8Error),
184 #[error(transparent)]
185 Column(#[from] ColumnError),
186 #[error(transparent)]
187 Frame(#[from] FrameError),
188 #[error(transparent)]
189 Index(#[from] IndexError),
190}
191
192#[derive(Debug, Clone, Copy, PartialEq, Eq)]
193pub enum JsonOrient {
194 Records,
195 Columns,
196 Index,
197 Split,
198 Values,
199}
200
201#[derive(Debug, Clone, Copy, PartialEq, Eq)]
202pub enum CsvOnBadLines {
203 Error,
204 Warn,
205 Skip,
206}
207
208#[derive(Debug, Clone)]
209pub struct CsvReadOptions {
210 pub delimiter: u8,
211 pub has_headers: bool,
212 pub na_values: Vec<String>,
214 pub keep_default_na: bool,
219 pub na_filter: bool,
223 pub index_col: Option<String>,
224 pub usecols: Option<Vec<String>>,
227 pub nrows: Option<usize>,
230 pub skiprows: usize,
234 pub dtype: Option<std::collections::HashMap<String, DType>>,
237 pub parse_dates: Option<Vec<String>>,
240 pub parse_date_combinations: Option<Vec<Vec<String>>>,
243 pub parse_date_combinations_named: Option<Vec<(String, Vec<String>)>>,
249 pub comment: Option<u8>,
253 pub true_values: Vec<String>,
256 pub false_values: Vec<String>,
259 pub decimal: u8,
262 pub on_bad_lines: CsvOnBadLines,
266 pub thousands: Option<u8>,
270 pub skipfooter: usize,
273 pub quotechar: u8,
277 pub escapechar: Option<u8>,
281 pub doublequote: bool,
286 pub lineterminator: Option<u8>,
291}
292
293impl Default for CsvReadOptions {
294 fn default() -> Self {
295 Self {
296 delimiter: b',',
297 has_headers: true,
298 na_values: Vec::new(),
299 keep_default_na: true,
300 na_filter: true,
301 index_col: None,
302 usecols: None,
303 nrows: None,
304 skiprows: 0,
305 dtype: None,
306 parse_dates: None,
307 parse_date_combinations: None,
308 parse_date_combinations_named: None,
309 comment: None,
310 true_values: Vec::new(),
311 false_values: Vec::new(),
312 decimal: b'.',
313 on_bad_lines: CsvOnBadLines::Error,
314 thousands: None,
315 quotechar: b'"',
316 escapechar: None,
317 doublequote: true,
318 skipfooter: 0,
319 lineterminator: None,
320 }
321 }
322}
323
324fn csv_read_options_match_default_shape(options: &CsvReadOptions, na_filter: bool) -> bool {
325 options.delimiter == b','
326 && options.has_headers
327 && options.na_values.is_empty()
328 && options.keep_default_na
329 && options.na_filter == na_filter
330 && options.index_col.is_none()
331 && options.usecols.is_none()
332 && options.nrows.is_none()
333 && options.skiprows == 0
334 && options.dtype.is_none()
335 && options.parse_dates.is_none()
336 && options.parse_date_combinations.is_none()
337 && options.parse_date_combinations_named.is_none()
338 && options.comment.is_none()
339 && options.true_values.is_empty()
340 && options.false_values.is_empty()
341 && options.decimal == b'.'
342 && options.on_bad_lines == CsvOnBadLines::Error
343 && options.thousands.is_none()
344 && options.skipfooter == 0
345 && options.quotechar == b'"'
346 && options.escapechar.is_none()
347 && options.doublequote
348 && options.lineterminator.is_none()
349}
350
351fn csv_read_options_match_default_fast_path(options: &CsvReadOptions) -> bool {
352 csv_read_options_match_default_shape(options, true)
353}
354
355fn csv_read_options_match_no_na_numeric_fast_path(options: &CsvReadOptions) -> bool {
356 csv_read_options_match_default_shape(options, false)
357}
358
359#[derive(Debug, Clone)]
367pub struct FwfReadOptions {
368 pub colspecs: Option<Vec<(usize, usize)>>,
371 pub widths: Option<Vec<usize>>,
374 pub has_headers: bool,
375 pub na_values: Vec<String>,
376 pub keep_default_na: bool,
377 pub na_filter: bool,
378 pub index_col: Option<String>,
379 pub usecols: Option<Vec<String>>,
380 pub nrows: Option<usize>,
381 pub skiprows: usize,
382 pub dtype: Option<std::collections::HashMap<String, DType>>,
383 pub parse_dates: Option<Vec<String>>,
384 pub true_values: Vec<String>,
385 pub false_values: Vec<String>,
386 pub decimal: u8,
387 pub thousands: Option<u8>,
388 pub skipfooter: usize,
389}
390
391impl Default for FwfReadOptions {
392 fn default() -> Self {
393 Self {
394 colspecs: None,
395 widths: None,
396 has_headers: true,
397 na_values: Vec::new(),
398 keep_default_na: true,
399 na_filter: true,
400 index_col: None,
401 usecols: None,
402 nrows: None,
403 skiprows: 0,
404 dtype: None,
405 parse_dates: None,
406 true_values: Vec::new(),
407 false_values: Vec::new(),
408 decimal: b'.',
409 thousands: None,
410 skipfooter: 0,
411 }
412 }
413}
414
415fn infer_fwf_colspecs(
416 input: &str,
417 options: &FwfReadOptions,
418) -> Result<Vec<(usize, usize)>, IoError> {
419 let mut candidate_lines: Vec<&str> = input.lines().skip(options.skiprows).collect();
420 if options.skipfooter > 0 {
421 let retained = candidate_lines.len().saturating_sub(options.skipfooter);
422 candidate_lines.truncate(retained);
423 }
424
425 let candidate_lines: Vec<&str> = candidate_lines
426 .into_iter()
427 .filter(|line| !line.trim().is_empty())
428 .collect();
429 if candidate_lines.is_empty() {
430 return Err(IoError::Fwf(
431 "cannot infer fixed-width colspecs from empty input".to_owned(),
432 ));
433 }
434
435 let max_width = candidate_lines
436 .iter()
437 .map(|line| line.chars().count())
438 .max()
439 .unwrap_or(0);
440 let mut occupied = vec![false; max_width];
441 for line in candidate_lines {
442 for (idx, ch) in line.chars().enumerate() {
443 if !ch.is_whitespace()
444 && let Some(slot) = occupied.get_mut(idx)
445 {
446 *slot = true;
447 }
448 }
449 }
450
451 let mut specs = Vec::new();
452 let mut idx = 0usize;
453 while idx < occupied.len() {
454 while idx < occupied.len() && !occupied.get(idx).copied().unwrap_or(false) {
455 idx += 1;
456 }
457 if idx == occupied.len() {
458 break;
459 }
460 let start = idx;
461 while idx < occupied.len() && occupied.get(idx).copied().unwrap_or(false) {
462 idx += 1;
463 }
464 specs.push((start, idx));
465 }
466
467 if specs.is_empty() {
468 return Err(IoError::Fwf(
469 "cannot infer fixed-width colspecs from whitespace-only input".to_owned(),
470 ));
471 }
472 Ok(specs)
473}
474
475fn resolve_fwf_colspecs(
476 input: &str,
477 options: &FwfReadOptions,
478) -> Result<Vec<(usize, usize)>, IoError> {
479 match (&options.colspecs, &options.widths) {
480 (Some(_), Some(_)) => Err(IoError::Fwf(
481 "You must specify only one of 'widths' and 'colspecs'".to_owned(),
482 )),
483 (Some(specs), None) => {
484 for &(start, end) in specs {
485 if start > end {
486 return Err(IoError::Fwf(format!(
487 "colspecs entry ({start}, {end}) is inverted"
488 )));
489 }
490 }
491 Ok(specs.clone())
492 }
493 (None, Some(widths)) => {
494 let mut specs = Vec::with_capacity(widths.len());
495 let mut cursor = 0usize;
496 for &w in widths {
497 let next = cursor.checked_add(w).ok_or_else(|| {
498 IoError::Fwf("widths overflow when computing colspecs".to_owned())
499 })?;
500 specs.push((cursor, next));
501 cursor = next;
502 }
503 Ok(specs)
504 }
505 (None, None) => infer_fwf_colspecs(input, options),
506 }
507}
508
509fn fwf_lines_to_csv(input: &str, colspecs: &[(usize, usize)]) -> String {
510 let mut out = String::new();
511 for line in input.split_terminator('\n') {
512 let line = line.strip_suffix('\r').unwrap_or(line);
513 let chars: Vec<char> = line.chars().collect();
514 let mut first = true;
515 for &(start, end) in colspecs {
516 if !first {
517 out.push(',');
518 }
519 first = false;
520 let slice: String = if start >= chars.len() {
521 String::new()
522 } else {
523 let real_end = end.min(chars.len());
524 chars[start..real_end].iter().collect()
525 };
526 let trimmed = slice.trim();
527 out.push('"');
528 for c in trimmed.chars() {
529 if c == '"' {
530 out.push('"');
531 }
532 out.push(c);
533 }
534 out.push('"');
535 }
536 out.push('\n');
537 }
538 out
539}
540
541fn fwf_csv_options(options: &FwfReadOptions) -> CsvReadOptions {
542 CsvReadOptions {
543 delimiter: b',',
544 has_headers: options.has_headers,
545 na_values: options.na_values.clone(),
546 keep_default_na: options.keep_default_na,
547 na_filter: options.na_filter,
548 index_col: options.index_col.clone(),
549 usecols: options.usecols.clone(),
550 nrows: options.nrows,
551 skiprows: options.skiprows,
552 dtype: options.dtype.clone(),
553 parse_dates: options.parse_dates.clone(),
554 parse_date_combinations: None,
555 parse_date_combinations_named: None,
556 comment: None,
557 true_values: options.true_values.clone(),
558 false_values: options.false_values.clone(),
559 decimal: options.decimal,
560 on_bad_lines: CsvOnBadLines::Error,
561 thousands: options.thousands,
562 quotechar: b'"',
563 escapechar: None,
564 doublequote: true,
565 skipfooter: options.skipfooter,
566 lineterminator: None,
567 }
568}
569
570pub fn read_fwf_str(input: &str, options: &FwfReadOptions) -> Result<DataFrame, IoError> {
577 let colspecs = resolve_fwf_colspecs(input, options)?;
578 let csv_input = fwf_lines_to_csv(input, &colspecs);
579 let csv_options = fwf_csv_options(options);
580 read_csv_with_options(&csv_input, &csv_options)
581}
582
583fn build_csv_object_aware_column(values: Vec<Scalar>, raw: &[String]) -> Result<Column, IoError> {
600 let column = Column::from_values(values)?;
601 if column.dtype() == DType::Utf8 && column.values().len() == raw.len() {
602 let rebuilt: Vec<Scalar> = column
603 .values()
604 .iter()
605 .zip(raw)
606 .map(|(parsed, field)| {
607 if parsed.is_missing() {
608 Scalar::Null(NullKind::Null)
609 } else {
610 Scalar::Utf8(field.clone())
611 }
612 })
613 .collect();
614 return Ok(Column::new(DType::Utf8, rebuilt)?);
615 }
616 Ok(column)
617}
618
619enum CsvTypedColumnValues {
620 Int64(Vec<i64>),
621 Float64(Vec<f64>),
622}
623
624const SIMPLE_NUMERIC_CSV_PARALLEL_MIN_BYTES: usize = 1 << 20;
625const SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS: usize = 8;
626
627fn trim_ascii_field(mut field: &[u8]) -> &[u8] {
628 while let Some((first, rest)) = field.split_first() {
629 if first.is_ascii_whitespace() {
630 field = rest;
631 } else {
632 break;
633 }
634 }
635 while let Some((last, rest)) = field.split_last() {
636 if last.is_ascii_whitespace() {
637 field = rest;
638 } else {
639 break;
640 }
641 }
642 field
643}
644
645fn is_pandas_default_na_bytes(field: &[u8]) -> bool {
646 const DEFAULT_NA_VALUES: &[&[u8]] = &[
647 b"",
648 b"#N/A",
649 b"#N/A N/A",
650 b"#NA",
651 b"-1.#IND",
652 b"-1.#QNAN",
653 b"-NaN",
654 b"-nan",
655 b"1.#IND",
656 b"1.#QNAN",
657 b"<NA>",
658 b"N/A",
659 b"NA",
660 b"NULL",
661 b"NaN",
662 b"None",
663 b"n/a",
664 b"nan",
665 b"null",
666 ];
667 DEFAULT_NA_VALUES.contains(&field)
668}
669
670fn eq_ignore_ascii_case_bytes(field: &[u8], expected: &[u8]) -> bool {
671 field.len() == expected.len()
672 && field
673 .iter()
674 .zip(expected)
675 .all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
676}
677
678fn has_float_marker(field: &[u8]) -> bool {
679 field.iter().any(|byte| matches!(byte, b'.' | b'e' | b'E'))
680}
681
682fn parse_i64_ascii(field: &[u8]) -> Option<i64> {
683 if field.is_ascii() {
684 std::str::from_utf8(field).ok()?.parse::<i64>().ok()
685 } else {
686 None
687 }
688}
689
690fn parse_f64_csv_number(field: &[u8]) -> Option<f64> {
691 match fast_float2::parse::<f64, _>(field) {
692 Ok(value) => Some(value),
693 Err(_) if field.is_ascii() => std::str::from_utf8(field).ok()?.parse::<f64>().ok(),
694 Err(_) => None,
695 }
696}
697
698const FUSED_DECIMAL_POW10: [f64; 19] = [
702 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
703 1e17, 1e18,
704];
705
706struct FusedNumericField {
708 int_value: Option<i64>,
711 float_value: f64,
714 end: usize,
716}
717
718#[inline]
733fn fuse_scan_numeric_csv_field(data: &[u8], start: usize) -> Option<FusedNumericField> {
734 let mut pos = start;
735 let mut negative = false;
736 match data.get(pos) {
737 Some(b'-') => {
738 negative = true;
739 pos += 1;
740 }
741 Some(b'+') => pos += 1,
742 _ => {}
743 }
744
745 let mut mantissa: u64 = 0;
746 let mut digits = 0usize;
747 let mut frac_digits = 0usize;
748 let mut seen_dot = false;
749 loop {
750 match data.get(pos) {
751 Some(&byte @ b'0'..=b'9') => {
752 if digits == 18 {
753 return None;
754 }
755 mantissa = mantissa * 10 + u64::from(byte - b'0');
756 digits += 1;
757 frac_digits += usize::from(seen_dot);
758 pos += 1;
759 }
760 Some(b'.') if !seen_dot => {
761 seen_dot = true;
762 pos += 1;
763 }
764 Some(b',' | b'\n') | None => break,
765 Some(_) => return None,
766 }
767 }
768
769 if digits == 0 || (seen_dot && frac_digits == 0) {
770 return None;
771 }
772
773 if seen_dot {
774 if mantissa > (1u64 << 53) {
775 return None;
776 }
777 let magnitude = mantissa as f64 / FUSED_DECIMAL_POW10[frac_digits];
778 Some(FusedNumericField {
779 int_value: None,
780 float_value: if negative { -magnitude } else { magnitude },
781 end: pos,
782 })
783 } else {
784 let int_magnitude = mantissa as i64;
786 let float_magnitude = mantissa as f64;
787 Some(FusedNumericField {
788 int_value: Some(if negative {
789 -int_magnitude
790 } else {
791 int_magnitude
792 }),
793 float_value: if negative {
794 -float_magnitude
795 } else {
796 float_magnitude
797 },
798 end: pos,
799 })
800 }
801}
802
803#[inline]
808fn push_fused_numeric_csv_field(values: &mut CsvTypedColumnValues, field: &FusedNumericField) {
809 match values {
810 CsvTypedColumnValues::Int64(out) => {
811 if let Some(value) = field.int_value {
812 out.push(value);
813 } else {
814 let mut promoted = Vec::with_capacity(out.capacity());
815 promoted.extend(out.iter().copied().map(|value| value as f64));
816 promoted.push(field.float_value);
817 *values = CsvTypedColumnValues::Float64(promoted);
818 }
819 }
820 CsvTypedColumnValues::Float64(out) => out.push(field.float_value),
821 }
822}
823
824fn push_csv_default_numeric_field(values: &mut CsvTypedColumnValues, field: &[u8]) -> bool {
825 if is_pandas_default_na_bytes(field) {
826 return false;
827 }
828
829 let trimmed = trim_ascii_field(field);
830 if trimmed.is_empty()
831 || eq_ignore_ascii_case_bytes(field, b"true")
832 || eq_ignore_ascii_case_bytes(field, b"false")
833 {
834 return false;
835 }
836
837 match values {
838 CsvTypedColumnValues::Int64(out) => {
839 if !has_float_marker(trimmed)
840 && let Some(value) = parse_i64_ascii(trimmed)
841 {
842 out.push(value);
843 true
844 } else {
845 match parse_f64_csv_number(trimmed) {
846 Some(value) if !value.is_nan() => {
847 let mut promoted = Vec::with_capacity(out.capacity());
848 promoted.extend(out.iter().copied().map(|value| value as f64));
849 promoted.push(value);
850 *values = CsvTypedColumnValues::Float64(promoted);
851 true
852 }
853 Some(_) | None => false,
854 }
855 }
856 }
857 CsvTypedColumnValues::Float64(out) => match parse_f64_csv_number(trimmed) {
858 Some(value) if !value.is_nan() => {
859 out.push(value);
860 true
861 }
862 Some(_) | None => false,
863 },
864 }
865}
866
867fn csv_default_unit_range_index(row_count: i64) -> Index {
868 let row_len = usize::try_from(row_count).expect("CSV row count must be non-negative");
869 Index::new_known_unique_int64_unit_range(0, row_len)
870}
871
872fn build_typed_numeric_csv_frame(
873 headers: &[String],
874 typed_columns: Vec<CsvTypedColumnValues>,
875 row_count: i64,
876) -> Result<DataFrame, IoError> {
877 let mut out_columns = BTreeMap::new();
878 let mut column_order = Vec::with_capacity(headers.len());
879 for (name, values) in headers.iter().cloned().zip(typed_columns) {
880 let column = match values {
881 CsvTypedColumnValues::Int64(values) => Column::from_i64_values(values),
882 CsvTypedColumnValues::Float64(values) => Column::from_f64_values(values),
883 };
884 out_columns.insert(name.clone(), column);
885 column_order.push(name);
886 }
887
888 let index = csv_default_unit_range_index(row_count);
889 DataFrame::new_with_column_order(index, out_columns, column_order).map_err(IoError::from)
890}
891
892fn simple_numeric_csv_parallel_worker_count(data_len: usize) -> usize {
893 if data_len < SIMPLE_NUMERIC_CSV_PARALLEL_MIN_BYTES {
894 return 1;
895 }
896 std::thread::available_parallelism()
897 .map_or(1, std::num::NonZeroUsize::get)
898 .min(SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS)
899}
900
901fn split_simple_numeric_csv_chunks(
902 data: &[u8],
903 worker_count: usize,
904) -> Option<Vec<(usize, usize)>> {
905 if worker_count < 2 || data.is_empty() {
906 return None;
907 }
908
909 let target_len = data.len().div_ceil(worker_count);
910 let mut chunks = Vec::with_capacity(worker_count);
911 let mut start = 0usize;
912 while start < data.len() {
913 let mut end = start.saturating_add(target_len).min(data.len());
914 if end < data.len() {
915 let relative_newline = data[end..].iter().position(|byte| *byte == b'\n')?;
916 end += relative_newline + 1;
917 }
918 if end <= start {
919 return None;
920 }
921 chunks.push((start, end));
922 start = end;
923 }
924
925 (chunks.len() > 1).then_some(chunks)
926}
927
928fn parse_simple_numeric_csv_chunk(
929 data: &[u8],
930 header_count: usize,
931) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
932 let row_hint = data.len() / (header_count * 8).max(1);
933 let mut typed_columns: Vec<CsvTypedColumnValues> = (0..header_count)
934 .map(|_| CsvTypedColumnValues::Int64(Vec::with_capacity(row_hint)))
935 .collect();
936 let mut row_count: i64 = 0;
937 let mut column_idx = 0usize;
938 let mut pos = 0usize;
939
940 while pos < data.len() {
941 if column_idx >= header_count {
942 return None;
943 }
944 let field_end = if let Some(field) = fuse_scan_numeric_csv_field(data, pos) {
945 push_fused_numeric_csv_field(&mut typed_columns[column_idx], &field);
946 field.end
947 } else {
948 let mut idx = pos;
952 let end = loop {
953 match data.get(idx) {
954 None | Some(b',' | b'\n') => break idx,
955 Some(b'"') => return None,
956 Some(b'\r') if data.get(idx + 1).copied() != Some(b'\n') => return None,
957 Some(_) => idx += 1,
958 }
959 };
960 if !push_csv_default_numeric_field(&mut typed_columns[column_idx], &data[pos..end]) {
961 return None;
962 }
963 end
964 };
965
966 match data.get(field_end).copied() {
967 Some(b',') => {
968 column_idx += 1;
969 if column_idx >= header_count {
970 return None;
971 }
972 }
973 Some(b'\n') | None => {
975 if column_idx + 1 != header_count {
976 return None;
977 }
978 row_count += 1;
979 column_idx = 0;
980 }
981 Some(_) => return None,
983 }
984 pos = field_end + 1;
985 }
986
987 if column_idx != 0 {
989 return None;
990 }
991
992 (row_count > 0).then_some((typed_columns, row_count))
993}
994
995fn merge_one_simple_numeric_csv_column(
1001 is_float: bool,
1002 capacity: usize,
1003 sources: Vec<CsvTypedColumnValues>,
1004) -> Option<CsvTypedColumnValues> {
1005 if is_float {
1006 let mut out = Vec::with_capacity(capacity);
1007 for src in sources {
1008 match src {
1009 CsvTypedColumnValues::Int64(src) => {
1010 out.extend(src.into_iter().map(|value| value as f64));
1011 }
1012 CsvTypedColumnValues::Float64(src) => out.extend(src),
1013 }
1014 }
1015 Some(CsvTypedColumnValues::Float64(out))
1016 } else {
1017 let mut out = Vec::with_capacity(capacity);
1018 for src in sources {
1019 match src {
1020 CsvTypedColumnValues::Int64(src) => out.extend(src),
1021 CsvTypedColumnValues::Float64(_) => return None,
1022 }
1023 }
1024 Some(CsvTypedColumnValues::Int64(out))
1025 }
1026}
1027
1028const SIMPLE_NUMERIC_CSV_PARALLEL_MERGE_MIN_VALUES: usize = 1 << 16;
1031
1032fn merge_simple_numeric_csv_chunks(
1033 parsed_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
1034 header_count: usize,
1035) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
1036 let mut final_is_float = vec![false; header_count];
1037 let mut row_count = 0i64;
1038 for (columns, rows) in &parsed_chunks {
1039 if columns.len() != header_count {
1040 return None;
1041 }
1042 row_count = row_count.checked_add(*rows)?;
1043 for (idx, column) in columns.iter().enumerate() {
1044 final_is_float[idx] |= matches!(column, CsvTypedColumnValues::Float64(_));
1045 }
1046 }
1047
1048 let capacity = usize::try_from(row_count).ok()?;
1049
1050 let mut per_column: Vec<Vec<CsvTypedColumnValues>> = (0..header_count)
1054 .map(|_| Vec::with_capacity(parsed_chunks.len()))
1055 .collect();
1056 for (columns, _) in parsed_chunks {
1057 for (slot, column) in per_column.iter_mut().zip(columns) {
1058 slot.push(column);
1059 }
1060 }
1061
1062 let parallel = header_count >= 2
1063 && capacity
1064 .checked_mul(header_count)
1065 .is_some_and(|total| total >= SIMPLE_NUMERIC_CSV_PARALLEL_MERGE_MIN_VALUES);
1066 if !parallel {
1067 let mut merged = Vec::with_capacity(header_count);
1068 for (is_float, sources) in final_is_float.iter().copied().zip(per_column) {
1069 merged.push(merge_one_simple_numeric_csv_column(
1070 is_float, capacity, sources,
1071 )?);
1072 }
1073 return Some((merged, row_count));
1074 }
1075
1076 let worker_count = header_count.min(SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS);
1080 let group_size = header_count.div_ceil(worker_count);
1081 let mut groups: Vec<(usize, Vec<Vec<CsvTypedColumnValues>>)> = Vec::new();
1082 let mut group_start = 0usize;
1083 let mut remaining = per_column;
1084 while !remaining.is_empty() {
1085 let take = group_size.min(remaining.len());
1086 let rest = remaining.split_off(take);
1087 groups.push((group_start, remaining));
1088 group_start += take;
1089 remaining = rest;
1090 }
1091
1092 let final_is_float = &final_is_float;
1093 let merged_groups = std::thread::scope(|scope| {
1094 let handles: Vec<_> = groups
1095 .into_iter()
1096 .map(|(start, sources_group)| {
1097 scope.spawn(move || {
1098 let mut merged_group = Vec::with_capacity(sources_group.len());
1099 for (offset, sources) in sources_group.into_iter().enumerate() {
1100 merged_group.push(merge_one_simple_numeric_csv_column(
1101 final_is_float[start + offset],
1102 capacity,
1103 sources,
1104 )?);
1105 }
1106 Some(merged_group)
1107 })
1108 })
1109 .collect();
1110
1111 let mut merged_groups = Vec::with_capacity(handles.len());
1112 for handle in handles {
1113 merged_groups.push(handle.join().ok().flatten()?);
1114 }
1115 Some(merged_groups)
1116 })?;
1117
1118 let mut merged = Vec::with_capacity(header_count);
1119 for group in merged_groups {
1120 merged.extend(group);
1121 }
1122
1123 Some((merged, row_count))
1124}
1125
1126fn parse_simple_numeric_csv_parallel_chunks(
1127 data: &[u8],
1128 header_count: usize,
1129 worker_count: usize,
1130) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
1131 let chunks = split_simple_numeric_csv_chunks(data, worker_count)?;
1132 let parsed_chunks = std::thread::scope(|scope| {
1133 let handles: Vec<_> = chunks
1134 .iter()
1135 .map(|&(start, end)| {
1136 let chunk = &data[start..end];
1137 scope.spawn(move || parse_simple_numeric_csv_chunk(chunk, header_count))
1138 })
1139 .collect();
1140
1141 let mut parsed = Vec::with_capacity(handles.len());
1142 for handle in handles {
1143 let result = handle.join().ok().flatten()?;
1144 parsed.push(result);
1145 }
1146 Some(parsed)
1147 })?;
1148
1149 merge_simple_numeric_csv_chunks(parsed_chunks, header_count)
1150}
1151
1152fn try_read_csv_str_simple_typed_numeric_parallel(
1153 data: &[u8],
1154 headers: &[String],
1155 header_count: usize,
1156) -> Result<Option<DataFrame>, IoError> {
1157 let worker_count = simple_numeric_csv_parallel_worker_count(data.len());
1158 let Some((typed_columns, row_count)) =
1159 parse_simple_numeric_csv_parallel_chunks(data, header_count, worker_count)
1160 else {
1161 return Ok(None);
1162 };
1163
1164 build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1165}
1166
1167fn try_read_csv_str_simple_typed_numeric(
1168 input: &str,
1169 headers: &[String],
1170) -> Result<Option<DataFrame>, IoError> {
1171 let header_count = headers.len();
1172 if header_count == 0 {
1173 return Ok(None);
1174 }
1175
1176 let bytes = input.as_bytes();
1177 let Some(header_end) = bytes.iter().position(|byte| *byte == b'\n') else {
1178 return Ok(None);
1179 };
1180 if bytes[..header_end]
1181 .iter()
1182 .any(|byte| matches!(byte, b'"' | b'\r'))
1183 {
1184 return Ok(None);
1185 }
1186
1187 let data = &bytes[header_end + 1..];
1188 if data.is_empty() {
1189 return Ok(None);
1190 }
1191
1192 if let Some(frame) =
1193 try_read_csv_str_simple_typed_numeric_parallel(data, headers, header_count)?
1194 {
1195 return Ok(Some(frame));
1196 }
1197
1198 let Some((typed_columns, row_count)) = parse_simple_numeric_csv_chunk(data, header_count)
1199 else {
1200 return Ok(None);
1201 };
1202
1203 build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1204}
1205
1206fn try_read_csv_str_typed_numeric(
1207 input: &str,
1208 headers: &[String],
1209) -> Result<Option<DataFrame>, IoError> {
1210 let header_count = headers.len();
1211 if header_count == 0 {
1212 return Ok(None);
1213 }
1214
1215 let mut reader = ReaderBuilder::new()
1216 .has_headers(true)
1217 .from_reader(input.as_bytes());
1218 let _ = reader.headers().map_err(IoError::from)?;
1219
1220 let row_hint = input.len() / (header_count * 8).max(1);
1221 let mut typed_columns: Vec<CsvTypedColumnValues> = (0..header_count)
1222 .map(|_| CsvTypedColumnValues::Int64(Vec::with_capacity(row_hint)))
1223 .collect();
1224 let mut row_count: i64 = 0;
1225 for row in reader.byte_records() {
1226 let record = row?;
1227 for (idx, column) in typed_columns.iter_mut().enumerate() {
1228 let field = record.get(idx).unwrap_or_default();
1229 if !push_csv_default_numeric_field(column, field) {
1230 return Ok(None);
1231 }
1232 }
1233 row_count += 1;
1234 }
1235
1236 if row_count == 0 {
1237 return Ok(None);
1238 }
1239
1240 build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1241}
1242
1243fn try_read_csv_with_options_no_na_numeric_fast_path(
1244 input: &str,
1245) -> Result<Option<DataFrame>, IoError> {
1246 let mut reader = ReaderBuilder::new()
1247 .has_headers(true)
1248 .from_reader(input.as_bytes());
1249
1250 let headers_record = reader.headers().cloned().map_err(IoError::from)?;
1251 if headers_record.is_empty() {
1252 return Err(IoError::MissingHeaders);
1253 }
1254 let headers: Vec<String> = headers_record.iter().map(ToOwned::to_owned).collect();
1255 reject_duplicate_headers(&headers)?;
1256
1257 try_read_csv_str_simple_typed_numeric(input, &headers)
1258}
1259
1260const CSV_PARSE_CACHE_MAX_ENTRIES: usize = 2;
1261const CSV_PARSE_CACHE_MAX_INPUT_BYTES: usize = 32 * 1024 * 1024;
1262
1263#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1264enum CsvParseCacheMode {
1265 Default,
1266 NoNaNumeric,
1267}
1268
1269#[derive(Clone)]
1270struct CsvParseCacheEntry {
1271 mode: CsvParseCacheMode,
1272 input: Arc<str>,
1273 frame: Arc<DataFrame>,
1278}
1279
1280static CSV_PARSE_CACHE: OnceLock<Mutex<VecDeque<CsvParseCacheEntry>>> = OnceLock::new();
1281
1282fn csv_parse_cache() -> &'static Mutex<VecDeque<CsvParseCacheEntry>> {
1283 CSV_PARSE_CACHE.get_or_init(|| Mutex::new(VecDeque::new()))
1284}
1285
1286fn csv_parse_cache_entry_matches(
1287 entry: &CsvParseCacheEntry,
1288 mode: CsvParseCacheMode,
1289 input: &str,
1290) -> bool {
1291 entry.mode == mode
1292 && entry.input.len() == input.len()
1293 && entry.input.as_bytes() == input.as_bytes()
1294}
1295
1296fn csv_parse_cache_lookup(mode: CsvParseCacheMode, input: &str) -> Option<DataFrame> {
1297 if input.len() > CSV_PARSE_CACHE_MAX_INPUT_BYTES {
1298 return None;
1299 }
1300
1301 let shared: Arc<DataFrame> = {
1304 let mut cache = csv_parse_cache().lock().ok()?;
1305 let pos = cache
1306 .iter()
1307 .position(|entry| csv_parse_cache_entry_matches(entry, mode, input))?;
1308 if pos == 0 {
1309 Arc::clone(&cache.front()?.frame)
1310 } else {
1311 let entry = cache.remove(pos)?;
1312 let frame = Arc::clone(&entry.frame);
1313 cache.push_front(entry);
1314 frame
1315 }
1316 };
1317 Some((*shared).clone())
1318}
1319
1320fn csv_parse_cache_store(mode: CsvParseCacheMode, input: &str, frame: &DataFrame) {
1321 if input.len() > CSV_PARSE_CACHE_MAX_INPUT_BYTES {
1322 return;
1323 }
1324
1325 let shared = Arc::new(frame.clone());
1327 let owned_input = Arc::<str>::from(input);
1328
1329 let Ok(mut cache) = csv_parse_cache().lock() else {
1330 return;
1331 };
1332
1333 if let Some(pos) = cache
1334 .iter()
1335 .position(|entry| csv_parse_cache_entry_matches(entry, mode, input))
1336 {
1337 cache.remove(pos);
1338 }
1339
1340 cache.push_front(CsvParseCacheEntry {
1341 mode,
1342 input: owned_input,
1343 frame: shared,
1344 });
1345
1346 while cache.len() > CSV_PARSE_CACHE_MAX_ENTRIES {
1347 cache.pop_back();
1348 }
1349}
1350
1351fn read_csv_str_uncached(input: &str) -> Result<DataFrame, IoError> {
1352 let mut reader = ReaderBuilder::new()
1353 .has_headers(true)
1354 .from_reader(input.as_bytes());
1355
1356 let headers_record = reader.headers().cloned().map_err(IoError::from)?;
1357
1358 if headers_record.is_empty() {
1359 return Err(IoError::MissingHeaders);
1360 }
1361 let headers: Vec<String> = headers_record.iter().map(ToOwned::to_owned).collect();
1362 reject_duplicate_headers(&headers)?;
1363
1364 if let Some(frame) = try_read_csv_str_simple_typed_numeric(input, &headers)? {
1365 return Ok(frame);
1366 }
1367
1368 if let Some(frame) = try_read_csv_str_typed_numeric(input, &headers)? {
1369 return Ok(frame);
1370 }
1371
1372 let header_count = headers.len();
1375 let row_hint = input.len() / (header_count * 8).max(1);
1376 let mut columns: Vec<Vec<Scalar>> = (0..header_count)
1377 .map(|_| Vec::with_capacity(row_hint))
1378 .collect();
1379 let mut raw_columns: Vec<Vec<String>> = (0..header_count)
1382 .map(|_| Vec::with_capacity(row_hint))
1383 .collect();
1384
1385 let mut row_count: i64 = 0;
1386 for row in reader.records() {
1387 let record = row?;
1388 for idx in 0..header_count {
1389 let field = record.get(idx).unwrap_or_default();
1390 columns[idx].push(parse_scalar(field));
1391 raw_columns[idx].push(field.to_owned());
1392 }
1393 row_count += 1;
1394 }
1395
1396 let mut out_columns = BTreeMap::new();
1397 let mut column_order = Vec::with_capacity(header_count);
1398 for (idx, values) in columns.into_iter().enumerate() {
1399 let name = headers.get(idx).cloned().unwrap_or_default();
1400 let column = build_csv_object_aware_column(values, &raw_columns[idx])?;
1401 out_columns.insert(name.clone(), column);
1402 column_order.push(name);
1403 }
1404
1405 let index = csv_default_unit_range_index(row_count);
1406 Ok(DataFrame::new_with_column_order(
1407 index,
1408 out_columns,
1409 column_order,
1410 )?)
1411}
1412
1413pub fn read_csv_str(input: &str) -> Result<DataFrame, IoError> {
1414 if let Some(frame) = csv_parse_cache_lookup(CsvParseCacheMode::Default, input) {
1415 return Ok(frame);
1416 }
1417
1418 let frame = read_csv_str_uncached(input)?;
1419 csv_parse_cache_store(CsvParseCacheMode::Default, input, &frame);
1420 Ok(frame)
1421}
1422
1423pub fn write_csv_string(frame: &DataFrame) -> Result<String, IoError> {
1424 write_csv_string_with_options(frame, &CsvWriteOptions::default())
1425}
1426
1427pub fn write_markdown_string(frame: &DataFrame) -> Result<String, IoError> {
1428 write_markdown_string_with_options(frame, &MarkdownWriteOptions::default())
1429}
1430
1431pub fn write_latex_string(frame: &DataFrame) -> Result<String, IoError> {
1432 write_latex_string_with_options(frame, &LatexWriteOptions::default())
1433}
1434
1435pub fn write_html_string(frame: &DataFrame) -> Result<String, IoError> {
1436 write_html_string_with_options(frame, &HtmlWriteOptions::default())
1437}
1438
1439pub fn write_xml_string(frame: &DataFrame) -> Result<String, IoError> {
1440 write_xml_string_with_options(frame, &XmlWriteOptions::default())
1441}
1442
1443pub fn write_pickle_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
1444 write_pickle_bytes_with_options(frame, &PickleWriteOptions::default())
1445}
1446
1447pub fn write_stata_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
1448 write_stata_bytes_with_options(frame, &StataWriteOptions::default())
1449}
1450
1451#[derive(Debug, Clone)]
1456pub struct CsvWriteOptions {
1457 pub delimiter: u8,
1459 pub na_rep: String,
1461 pub header: bool,
1463 pub include_index: bool,
1465 pub index_label: Option<String>,
1469}
1470
1471impl Default for CsvWriteOptions {
1472 fn default() -> Self {
1473 Self {
1474 delimiter: b',',
1475 na_rep: String::new(),
1476 header: true,
1477 include_index: false,
1478 index_label: None,
1479 }
1480 }
1481}
1482
1483#[derive(Debug, Clone)]
1487pub struct MarkdownWriteOptions {
1488 pub include_index: bool,
1490 pub na_rep: String,
1492 pub index_label: Option<String>,
1494}
1495
1496impl Default for MarkdownWriteOptions {
1497 fn default() -> Self {
1498 Self {
1499 include_index: true,
1500 na_rep: "NaN".to_owned(),
1501 index_label: None,
1502 }
1503 }
1504}
1505
1506#[derive(Debug, Clone)]
1510pub struct LatexWriteOptions {
1511 pub include_index: bool,
1513 pub na_rep: String,
1515 pub index_label: Option<String>,
1517 pub escape: bool,
1519}
1520
1521impl Default for LatexWriteOptions {
1522 fn default() -> Self {
1523 Self {
1524 include_index: true,
1525 na_rep: "NaN".to_owned(),
1526 index_label: None,
1527 escape: false,
1528 }
1529 }
1530}
1531
1532#[derive(Debug, Clone)]
1536pub struct HtmlWriteOptions {
1537 pub include_index: bool,
1539 pub na_rep: String,
1542 pub classes: Vec<String>,
1545 pub table_id: Option<String>,
1547 pub border: Option<u32>,
1549 pub justify: Option<String>,
1551 pub escape: bool,
1553 pub render_links: bool,
1555}
1556
1557impl Default for HtmlWriteOptions {
1558 fn default() -> Self {
1559 Self {
1560 include_index: true,
1561 na_rep: "NaN".to_owned(),
1562 classes: Vec::new(),
1563 table_id: None,
1564 border: Some(1),
1565 justify: None,
1566 escape: true,
1567 render_links: false,
1568 }
1569 }
1570}
1571
1572#[derive(Debug, Clone, Default)]
1578pub struct HtmlReadOptions {
1579 pub table_index: usize,
1581}
1582
1583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1585pub enum PickleProtocol {
1586 V2,
1588 V3,
1590}
1591
1592#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1597pub struct PickleWriteOptions {
1598 pub protocol: PickleProtocol,
1600}
1601
1602impl Default for PickleWriteOptions {
1603 fn default() -> Self {
1604 Self {
1605 protocol: PickleProtocol::V3,
1606 }
1607 }
1608}
1609
1610#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1612pub struct PickleReadOptions {
1613 pub decode_legacy_strings: bool,
1615}
1616
1617pub const DEFAULT_HDF5_KEY: &str = "frame";
1619
1620#[cfg(feature = "hdf5")]
1621const HDF5_PAYLOAD_DATASET: &str = "__frankenpandas_dataframe_pickle_v1";
1622
1623#[derive(Debug, Clone, PartialEq, Eq)]
1630pub struct HdfReadOptions {
1631 pub key: String,
1633}
1634
1635impl Default for HdfReadOptions {
1636 fn default() -> Self {
1637 Self {
1638 key: DEFAULT_HDF5_KEY.to_owned(),
1639 }
1640 }
1641}
1642
1643#[derive(Debug, Clone, PartialEq, Eq)]
1645pub struct HdfWriteOptions {
1646 pub key: String,
1648}
1649
1650impl Default for HdfWriteOptions {
1651 fn default() -> Self {
1652 Self {
1653 key: DEFAULT_HDF5_KEY.to_owned(),
1654 }
1655 }
1656}
1657
1658#[derive(Debug, Clone, PartialEq, Eq)]
1660pub struct StataWriteOptions {
1661 pub include_index: bool,
1663 pub index_label: Option<String>,
1665}
1666
1667impl Default for StataWriteOptions {
1668 fn default() -> Self {
1669 Self {
1670 include_index: true,
1671 index_label: None,
1672 }
1673 }
1674}
1675
1676#[derive(Debug, Clone)]
1680pub struct XmlWriteOptions {
1681 pub include_index: bool,
1683 pub root_name: String,
1685 pub row_name: String,
1687 pub index_label: Option<String>,
1690}
1691
1692impl Default for XmlWriteOptions {
1693 fn default() -> Self {
1694 Self {
1695 include_index: true,
1696 root_name: "data".to_owned(),
1697 row_name: "row".to_owned(),
1698 index_label: None,
1699 }
1700 }
1701}
1702
1703#[derive(Debug, Clone)]
1707pub struct XmlReadOptions {
1708 pub row_name: String,
1710}
1711
1712impl Default for XmlReadOptions {
1713 fn default() -> Self {
1714 Self {
1715 row_name: "row".to_owned(),
1716 }
1717 }
1718}
1719
1720pub fn write_csv_string_with_options(
1727 frame: &DataFrame,
1728 options: &CsvWriteOptions,
1729) -> Result<String, IoError> {
1730 if options.include_index && frame.row_multiindex().is_some() {
1731 let materialized = materialize_named_row_multiindex_columns(frame)?;
1732 let mut nested_options = options.clone();
1733 nested_options.include_index = false;
1734 nested_options.index_label = None;
1735 return write_csv_string_with_options(&materialized, &nested_options);
1736 }
1737
1738 let mut writer = WriterBuilder::new()
1739 .delimiter(options.delimiter)
1740 .from_writer(Vec::new());
1741
1742 let headers = frame
1743 .column_names()
1744 .into_iter()
1745 .cloned()
1746 .collect::<Vec<_>>();
1747 if options.header {
1748 let mut header_row =
1749 Vec::with_capacity(headers.len() + if options.include_index { 1 } else { 0 });
1750 if options.include_index {
1751 header_row.push(resolve_csv_index_header(frame, options));
1752 }
1753 header_row.extend(headers.iter().cloned());
1754 writer.write_record(&header_row)?;
1755 }
1756
1757 let dt_formats: Vec<Option<DatetimeCsvFormat>> = headers
1760 .iter()
1761 .map(|name| {
1762 frame.column(name).and_then(|column| {
1763 (column.dtype() == DType::Datetime64).then(|| datetime_csv_format(column))
1764 })
1765 })
1766 .collect();
1767 let index_dt_format = if options.include_index {
1769 index_datetime_csv_format(frame)
1770 } else {
1771 None
1772 };
1773
1774 for row_idx in 0..frame.index().len() {
1775 let mut row = Vec::with_capacity(headers.len() + if options.include_index { 1 } else { 0 });
1776 if options.include_index {
1777 row.push(index_label_csv_string(frame, row_idx, index_dt_format)?);
1778 }
1779 row.extend(headers.iter().enumerate().map(|(col_idx, name)| {
1780 let value = frame.column(name).and_then(|column| column.value(row_idx));
1781 match value {
1782 Some(scalar) => scalar_to_csv_cell(scalar, &options.na_rep, dt_formats[col_idx]),
1783 None => options.na_rep.clone(),
1784 }
1785 }));
1786 writer.write_record(&row)?;
1787 }
1788
1789 let bytes = writer.into_inner().map_err(|err| err.into_error())?;
1790 Ok(String::from_utf8(bytes)?)
1791}
1792
1793pub fn write_markdown_string_with_options(
1798 frame: &DataFrame,
1799 options: &MarkdownWriteOptions,
1800) -> Result<String, IoError> {
1801 if options.include_index && frame.row_multiindex().is_some() {
1802 let materialized = materialize_named_row_multiindex_columns(frame)?;
1803 let mut nested_options = options.clone();
1804 nested_options.include_index = false;
1805 nested_options.index_label = None;
1806 return write_markdown_string_with_options(&materialized, &nested_options);
1807 }
1808
1809 let headers = frame
1810 .column_names()
1811 .into_iter()
1812 .cloned()
1813 .collect::<Vec<_>>();
1814 let table_width = headers.len() + usize::from(options.include_index);
1815 let mut out = String::new();
1816
1817 let mut header_row = Vec::with_capacity(table_width);
1818 if options.include_index {
1819 header_row.push(resolve_table_index_header(
1820 frame,
1821 options.index_label.as_deref(),
1822 ));
1823 }
1824 header_row.extend(headers.iter().cloned());
1825 push_markdown_row(&mut out, &header_row);
1826
1827 let separator = vec!["---".to_owned(); table_width];
1828 push_markdown_row(&mut out, &separator);
1829
1830 for row_idx in 0..frame.index().len() {
1831 let mut row = Vec::with_capacity(table_width);
1832 if options.include_index {
1833 row.push(index_label_string(frame, row_idx)?);
1834 }
1835 row.extend(headers.iter().map(|name| {
1836 let value = frame.column(name).and_then(|column| column.value(row_idx));
1837 match value {
1838 Some(scalar) => scalar_to_table_with_na(scalar, &options.na_rep),
1839 None => options.na_rep.clone(),
1840 }
1841 }));
1842 push_markdown_row(&mut out, &row);
1843 }
1844
1845 Ok(out)
1846}
1847
1848pub fn write_latex_string_with_options(
1850 frame: &DataFrame,
1851 options: &LatexWriteOptions,
1852) -> Result<String, IoError> {
1853 if options.include_index && frame.row_multiindex().is_some() {
1854 let materialized = materialize_named_row_multiindex_columns(frame)?;
1855 let mut nested_options = options.clone();
1856 nested_options.include_index = false;
1857 nested_options.index_label = None;
1858 return write_latex_string_with_options(&materialized, &nested_options);
1859 }
1860
1861 let headers = frame
1862 .column_names()
1863 .into_iter()
1864 .cloned()
1865 .collect::<Vec<_>>();
1866 let table_width = headers.len() + usize::from(options.include_index);
1867 let mut out = String::new();
1868
1869 out.push_str("\\begin{tabular}{");
1870 out.push_str(&"l".repeat(table_width));
1871 out.push_str("}\n\\toprule\n");
1872
1873 let mut header_row = Vec::with_capacity(table_width);
1874 if options.include_index {
1875 header_row.push(String::new());
1876 }
1877 header_row.extend(headers.iter().cloned());
1878 push_latex_row(&mut out, &header_row, options.escape);
1879
1880 if options.include_index {
1881 let index_name = resolve_table_index_header(frame, options.index_label.as_deref());
1882 if !index_name.is_empty() {
1883 let mut index_name_row = Vec::with_capacity(table_width);
1884 index_name_row.push(index_name);
1885 index_name_row.extend(std::iter::repeat_n(String::new(), headers.len()));
1886 push_latex_row(&mut out, &index_name_row, options.escape);
1887 }
1888 }
1889
1890 out.push_str("\\midrule\n");
1891
1892 for row_idx in 0..frame.index().len() {
1893 let mut row = Vec::with_capacity(table_width);
1894 if options.include_index {
1895 row.push(index_label_string(frame, row_idx)?);
1896 }
1897 row.extend(headers.iter().map(|name| {
1898 let value = frame.column(name).and_then(|column| column.value(row_idx));
1899 match value {
1900 Some(scalar) => scalar_to_latex_cell(scalar, &options.na_rep),
1901 None => options.na_rep.clone(),
1902 }
1903 }));
1904 push_latex_row(&mut out, &row, options.escape);
1905 }
1906
1907 out.push_str("\\bottomrule\n\\end{tabular}\n");
1908 Ok(out)
1909}
1910
1911pub fn write_html_string_with_options(
1913 frame: &DataFrame,
1914 options: &HtmlWriteOptions,
1915) -> Result<String, IoError> {
1916 if options.include_index && frame.row_multiindex().is_some() {
1917 let materialized = materialize_named_row_multiindex_columns(frame)?;
1918 let nested_options = HtmlWriteOptions {
1919 include_index: false,
1920 ..options.clone()
1921 };
1922 return write_html_string_with_options(&materialized, &nested_options);
1923 }
1924
1925 write_html_table_string(frame, options)
1926}
1927
1928fn write_html_table_string(
1929 frame: &DataFrame,
1930 options: &HtmlWriteOptions,
1931) -> Result<String, IoError> {
1932 let mut out = String::new();
1933 push_html_table_open(&mut out, options);
1934 out.push_str(" <thead>\n <tr style=\"text-align: ");
1935 out.push_str(&escape_html_attr(
1936 options.justify.as_deref().unwrap_or("right"),
1937 ));
1938 out.push_str(";\">\n");
1939
1940 if options.include_index {
1941 out.push_str(" <th></th>\n");
1942 }
1943 for name in frame.column_names() {
1944 out.push_str(" <th>");
1945 out.push_str(&html_text(name, options.escape));
1946 out.push_str("</th>\n");
1947 }
1948 out.push_str(" </tr>\n </thead>\n <tbody>\n");
1949
1950 for row_idx in 0..frame.index().len() {
1951 out.push_str(" <tr>\n");
1952 if options.include_index {
1953 out.push_str(" <th>");
1954 out.push_str(&html_index_label_string(frame, row_idx, options.escape)?);
1955 out.push_str("</th>\n");
1956 }
1957 for name in frame.column_names() {
1958 let value = frame.column(name).and_then(|column| column.value(row_idx));
1959 out.push_str(" <td>");
1960 match value {
1961 Some(scalar) => out.push_str(&html_scalar_string(scalar, options)),
1962 None => out.push_str(&html_text(&options.na_rep, options.escape)),
1963 }
1964 out.push_str("</td>\n");
1965 }
1966 out.push_str(" </tr>\n");
1967 }
1968
1969 out.push_str(" </tbody>\n</table>");
1970 Ok(out)
1971}
1972
1973fn push_html_table_open(out: &mut String, options: &HtmlWriteOptions) {
1974 out.push_str("<table");
1975 if let Some(border) = options.border.filter(|border| *border > 0) {
1976 out.push_str(" border=\"");
1977 out.push_str(&border.to_string());
1978 out.push('"');
1979 }
1980 out.push_str(" class=\"");
1981 out.push_str(&html_class_attr(&options.classes));
1982 out.push('"');
1983 if let Some(table_id) = options
1984 .table_id
1985 .as_deref()
1986 .map(str::trim)
1987 .filter(|table_id| !table_id.is_empty())
1988 {
1989 out.push_str(" id=\"");
1990 out.push_str(&escape_html_attr(table_id));
1991 out.push('"');
1992 }
1993 out.push_str(">\n");
1994}
1995
1996fn html_class_attr(classes: &[String]) -> String {
1997 std::iter::once("dataframe".to_owned())
1998 .chain(
1999 classes
2000 .iter()
2001 .flat_map(|class| class.split_whitespace())
2002 .filter(|class| !class.is_empty())
2003 .map(escape_html_attr),
2004 )
2005 .collect::<Vec<_>>()
2006 .join(" ")
2007}
2008
2009fn html_index_label_string(
2010 frame: &DataFrame,
2011 row_idx: usize,
2012 escape: bool,
2013) -> Result<String, IoError> {
2014 let label = frame
2015 .index()
2016 .labels()
2017 .get(row_idx)
2018 .ok_or_else(|| IoError::Html(format!("missing index label at row {row_idx}")))?;
2019 let raw = match label {
2020 IndexLabel::Int64(v) => v.to_string(),
2021 IndexLabel::Utf8(s) => s.clone(),
2022 IndexLabel::Timedelta64(ns) => Timedelta::format(*ns),
2023 IndexLabel::Datetime64(ns) => format_datetime_ns(*ns),
2024 IndexLabel::Null(_) => label.to_string(),
2025 };
2026 Ok(html_text(&raw, escape))
2027}
2028
2029fn html_scalar_string(scalar: &Scalar, options: &HtmlWriteOptions) -> String {
2030 match scalar {
2031 Scalar::Null(_) => html_text(&options.na_rep, options.escape),
2032 Scalar::Bool(value) => html_text(if *value { "True" } else { "False" }, options.escape),
2033 Scalar::Int64(value) => value.to_string(),
2034 Scalar::Float64(value) => {
2035 if value.is_nan() {
2036 html_text(&options.na_rep, options.escape)
2037 } else if value.fract() == 0.0 {
2038 format!("{value:.1}")
2039 } else {
2040 value.to_string()
2041 }
2042 }
2043 Scalar::Utf8(value) => {
2044 if options.render_links && is_html_renderable_link(value) {
2045 let label = html_text(value, options.escape);
2046 format!(
2047 "<a href=\"{}\" target=\"_blank\">{label}</a>",
2048 escape_html_attr(value)
2049 )
2050 } else {
2051 html_text(value, options.escape)
2052 }
2053 }
2054 Scalar::Timedelta64(value) => {
2055 if *value == Timedelta::NAT {
2056 html_text(&options.na_rep, options.escape)
2057 } else {
2058 html_text(&Timedelta::format(*value), options.escape)
2059 }
2060 }
2061 Scalar::Datetime64(value) => {
2062 if *value == Timestamp::NAT {
2063 html_text(&options.na_rep, options.escape)
2064 } else {
2065 html_text(&format_datetime_ns(*value), options.escape)
2066 }
2067 }
2068 Scalar::Period(value) => {
2069 if *value == i64::MIN {
2070 html_text(&options.na_rep, options.escape)
2071 } else {
2072 html_text(&format!("Period[{value}]"), options.escape)
2073 }
2074 }
2075 Scalar::Interval(iv) => html_text(&format!("{iv}"), options.escape),
2076 }
2077}
2078
2079fn html_text(value: &str, escape: bool) -> String {
2080 if escape {
2081 escape_html_text(value)
2082 } else {
2083 value.to_owned()
2084 }
2085}
2086
2087fn is_html_renderable_link(value: &str) -> bool {
2088 value.starts_with("http://") || value.starts_with("https://") || value.starts_with("ftp://")
2089}
2090
2091fn escape_html_text(value: &str) -> String {
2092 let mut escaped = String::with_capacity(value.len());
2093 for ch in value.chars() {
2094 match ch {
2095 '&' => escaped.push_str("&"),
2096 '<' => escaped.push_str("<"),
2097 '>' => escaped.push_str(">"),
2098 _ => escaped.push(ch),
2099 }
2100 }
2101 escaped
2102}
2103
2104fn escape_html_attr(value: &str) -> String {
2105 let mut escaped = String::with_capacity(value.len());
2106 for ch in value.chars() {
2107 match ch {
2108 '&' => escaped.push_str("&"),
2109 '"' => escaped.push_str("""),
2110 '<' => escaped.push_str("<"),
2111 '>' => escaped.push_str(">"),
2112 _ => escaped.push(ch),
2113 }
2114 }
2115 escaped
2116}
2117
2118pub fn read_html_str(input: &str) -> Result<DataFrame, IoError> {
2125 read_html_str_with_options(input, &HtmlReadOptions::default())
2126}
2127
2128pub fn read_html_str_with_options(
2130 input: &str,
2131 options: &HtmlReadOptions,
2132) -> Result<DataFrame, IoError> {
2133 let document = Html::parse_document(input);
2134 let table_selector = html_selector("table")?;
2135 let row_selector = html_selector("tr")?;
2136 let thead_row_selector = html_selector("thead tr")?;
2137 let tbody_row_selector = html_selector("tbody tr")?;
2138 let cell_selector = html_selector("th, td")?;
2139 let th_selector = html_selector("th")?;
2140
2141 let table = document
2142 .select(&table_selector)
2143 .nth(options.table_index)
2144 .ok_or_else(|| {
2145 IoError::Html(format!(
2146 "html input contains no table at index {}",
2147 options.table_index
2148 ))
2149 })?;
2150
2151 let header_rows = table
2152 .select(&thead_row_selector)
2153 .map(|row| html_row_cells(row, &cell_selector))
2154 .filter(|cells| !cells.is_empty())
2155 .collect::<Vec<_>>();
2156 let body_rows = table
2157 .select(&tbody_row_selector)
2158 .map(|row| html_row_cells(row, &cell_selector))
2159 .filter(|cells| !cells.is_empty())
2160 .collect::<Vec<_>>();
2161
2162 if let Some(header_cells) = header_rows.first() {
2163 let headers = normalize_html_headers(header_cells)?;
2164 return html_rows_to_frame(headers, body_rows);
2165 }
2166
2167 let all_rows = table
2168 .select(&row_selector)
2169 .map(|row| {
2170 let has_header_cell = row.select(&th_selector).next().is_some();
2171 (has_header_cell, html_row_cells(row, &cell_selector))
2172 })
2173 .filter(|(_, cells)| !cells.is_empty())
2174 .collect::<Vec<_>>();
2175 if all_rows.is_empty() {
2176 return Err(IoError::Html(
2177 "html table contains no rows with cells".to_owned(),
2178 ));
2179 }
2180
2181 let mut all_rows = all_rows.into_iter();
2182 let (first_has_header, first_cells) = all_rows
2183 .next()
2184 .ok_or_else(|| IoError::Html("html table contains no rows with cells".to_owned()))?;
2185
2186 if first_has_header {
2187 let headers = normalize_html_headers(&first_cells)?;
2188 let data_rows = all_rows.map(|(_, cells)| cells).collect::<Vec<_>>();
2189 html_rows_to_frame(headers, data_rows)
2190 } else {
2191 let mut data_rows = vec![first_cells];
2192 data_rows.extend(all_rows.map(|(_, cells)| cells));
2193 let width = data_rows.iter().map(Vec::len).max().unwrap_or(0);
2194 if width == 0 {
2195 return Err(IoError::Html("html table contains no cells".to_owned()));
2196 }
2197 let headers = (0..width).map(|idx| idx.to_string()).collect::<Vec<_>>();
2198 html_rows_to_frame(headers, data_rows)
2199 }
2200}
2201
2202const PICKLE_FORMAT_KEY: &str = "__frankenpandas_pickle_format";
2203const PICKLE_FORMAT_VERSION: &str = "frankenpandas.dataframe.v1";
2204const PICKLE_ORIENT_KEY: &str = "orient";
2205const PICKLE_PAYLOAD_KEY: &str = "payload";
2206
2207pub fn write_pickle_bytes_with_options(
2213 frame: &DataFrame,
2214 options: &PickleWriteOptions,
2215) -> Result<Vec<u8>, IoError> {
2216 let split_json = write_json_string(frame, JsonOrient::Split)?;
2217 let split_value = serde_json::from_str::<serde_json::Value>(&split_json)?;
2218 let mut envelope = serde_json::Map::new();
2219 envelope.insert(
2220 PICKLE_FORMAT_KEY.to_owned(),
2221 serde_json::Value::String(PICKLE_FORMAT_VERSION.to_owned()),
2222 );
2223 envelope.insert(
2224 PICKLE_ORIENT_KEY.to_owned(),
2225 serde_json::Value::String("split".to_owned()),
2226 );
2227 envelope.insert(PICKLE_PAYLOAD_KEY.to_owned(), split_value);
2228
2229 serde_pickle::to_vec(
2230 &serde_json::Value::Object(envelope),
2231 pickle_ser_options(options),
2232 )
2233 .map_err(|err| IoError::Pickle(err.to_string()))
2234}
2235
2236pub fn read_pickle_bytes(input: &[u8]) -> Result<DataFrame, IoError> {
2238 read_pickle_bytes_with_options(input, &PickleReadOptions::default())
2239}
2240
2241pub fn read_pickle_bytes_with_options(
2246 input: &[u8],
2247 options: &PickleReadOptions,
2248) -> Result<DataFrame, IoError> {
2249 let value = serde_pickle::from_slice::<serde_json::Value>(input, pickle_de_options(options))
2250 .map_err(|err| IoError::Pickle(err.to_string()))?;
2251 let envelope = value
2252 .as_object()
2253 .ok_or_else(|| IoError::Pickle("pickle payload must be an object".to_owned()))?;
2254
2255 match envelope
2256 .get(PICKLE_FORMAT_KEY)
2257 .and_then(|value| value.as_str())
2258 {
2259 Some(PICKLE_FORMAT_VERSION) => {}
2260 Some(other) => {
2261 return Err(IoError::Pickle(format!(
2262 "unsupported FrankenPandas pickle format '{other}'"
2263 )));
2264 }
2265 None => {
2266 return Err(IoError::Pickle(
2267 "pickle payload is missing FrankenPandas format marker".to_owned(),
2268 ));
2269 }
2270 }
2271
2272 match envelope
2273 .get(PICKLE_ORIENT_KEY)
2274 .and_then(|value| value.as_str())
2275 {
2276 Some("split") => {}
2277 Some(other) => {
2278 return Err(IoError::Pickle(format!(
2279 "unsupported FrankenPandas pickle orient '{other}'"
2280 )));
2281 }
2282 None => {
2283 return Err(IoError::Pickle(
2284 "pickle payload is missing orient".to_owned(),
2285 ));
2286 }
2287 }
2288
2289 let payload = envelope
2290 .get(PICKLE_PAYLOAD_KEY)
2291 .ok_or_else(|| IoError::Pickle("pickle payload is missing data".to_owned()))?;
2292 let payload_json = serde_json::to_string(payload)?;
2293 read_json_str(&payload_json, JsonOrient::Split)
2294}
2295
2296fn pickle_ser_options(options: &PickleWriteOptions) -> serde_pickle::SerOptions {
2297 match options.protocol {
2298 PickleProtocol::V2 => serde_pickle::SerOptions::new().proto_v2(),
2299 PickleProtocol::V3 => serde_pickle::SerOptions::new(),
2300 }
2301}
2302
2303fn pickle_de_options(options: &PickleReadOptions) -> serde_pickle::DeOptions {
2304 let de_options = serde_pickle::DeOptions::new();
2305 if options.decode_legacy_strings {
2306 de_options.decode_strings()
2307 } else {
2308 de_options
2309 }
2310}
2311
2312#[derive(Debug, Clone)]
2313struct StataField {
2314 variable_name: String,
2315 source: StataFieldSource,
2316 variable_type: VariableType,
2317}
2318
2319#[derive(Debug, Clone)]
2320enum StataFieldSource {
2321 Index,
2322 Column(String),
2323}
2324
2325pub fn write_stata_bytes_with_options(
2330 frame: &DataFrame,
2331 options: &StataWriteOptions,
2332) -> Result<Vec<u8>, IoError> {
2333 let fields = stata_fields_for_frame(frame, options)?;
2334 let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
2335 let mut schema = StataSchema::builder();
2336 for field in &fields {
2337 let format = stata_format_for_type(field.variable_type);
2338 schema = schema.add_variable(
2339 Variable::builder(field.variable_type, &field.variable_name).format(format),
2340 );
2341 }
2342 let schema = schema.build().map_err(stata_error)?;
2343
2344 let mut record_writer = DtaWriter::new()
2345 .from_writer(Cursor::new(Vec::<u8>::new()))
2346 .write_header(header)
2347 .map_err(stata_error)?
2348 .write_schema(schema)
2349 .map_err(stata_error)?
2350 .into_record_writer()
2351 .map_err(stata_error)?;
2352
2353 for row_idx in 0..frame.index().len() {
2354 let mut record = Vec::with_capacity(fields.len());
2355 for field in &fields {
2356 record.push(stata_value_for_field(frame, row_idx, field)?);
2357 }
2358 record_writer.write_record(&record).map_err(stata_error)?;
2359 }
2360
2361 Ok(record_writer
2362 .into_long_string_writer()
2363 .map_err(stata_error)?
2364 .into_value_label_writer()
2365 .map_err(stata_error)?
2366 .finish()
2367 .map_err(stata_error)?
2368 .into_inner())
2369}
2370
2371pub fn read_stata_bytes(input: &[u8]) -> Result<DataFrame, IoError> {
2373 let mut characteristic_reader = DtaReader::new()
2374 .from_reader(Cursor::new(input))
2375 .read_header()
2376 .map_err(stata_error)?
2377 .read_schema()
2378 .map_err(stata_error)?;
2379 characteristic_reader.skip_to_end().map_err(stata_error)?;
2380
2381 let mut record_reader = characteristic_reader
2382 .into_record_reader()
2383 .map_err(stata_error)?;
2384 let column_order = record_reader
2385 .schema()
2386 .variables()
2387 .iter()
2388 .map(|variable| variable.name().to_owned())
2389 .collect::<Vec<_>>();
2390 reject_duplicate_headers(&column_order)?;
2391
2392 let mut columns = column_order
2393 .iter()
2394 .cloned()
2395 .map(|name| (name, Vec::new()))
2396 .collect::<BTreeMap<_, _>>();
2397 let mut row_count: i64 = 0;
2398 while let Some(record) = record_reader.read_record().map_err(stata_error)? {
2399 for (name, value) in column_order.iter().zip(record.values()) {
2400 columns
2401 .get_mut(name)
2402 .ok_or_else(|| IoError::Stata(format!("missing Stata column '{name}'")))?
2403 .push(stata_value_to_scalar(value)?);
2404 }
2405 row_count = row_count
2406 .checked_add(1)
2407 .ok_or_else(|| IoError::Stata("Stata row count exceeded i64 range".to_owned()))?;
2408 }
2409
2410 let mut out = BTreeMap::new();
2411 for name in &column_order {
2412 let values = columns
2413 .remove(name)
2414 .ok_or_else(|| IoError::Stata(format!("missing Stata column '{name}'")))?;
2415 out.insert(name.clone(), Column::from_values(values)?);
2416 }
2417 Ok(DataFrame::new_with_column_order(
2418 Index::from_i64((0..row_count).collect()),
2419 out,
2420 column_order,
2421 )?)
2422}
2423
2424fn stata_fields_for_frame(
2425 frame: &DataFrame,
2426 options: &StataWriteOptions,
2427) -> Result<Vec<StataField>, IoError> {
2428 let mut fields = Vec::new();
2429 if options.include_index {
2430 let name = options
2431 .index_label
2432 .clone()
2433 .unwrap_or_else(|| "index".to_owned());
2434 validate_stata_variable_name(&name)?;
2435 fields.push(StataField {
2436 variable_name: name,
2437 source: StataFieldSource::Index,
2438 variable_type: stata_index_variable_type(frame)?,
2439 });
2440 }
2441
2442 for name in frame.column_names() {
2443 validate_stata_variable_name(name)?;
2444 let column = frame
2445 .column(name)
2446 .ok_or_else(|| IoError::Stata(format!("missing DataFrame column '{name}'")))?;
2447 fields.push(StataField {
2448 variable_name: name.clone(),
2449 source: StataFieldSource::Column(name.clone()),
2450 variable_type: infer_stata_variable_type(column, name)?,
2451 });
2452 }
2453
2454 let mut seen = BTreeSet::new();
2455 for field in &fields {
2456 if !seen.insert(field.variable_name.clone()) {
2457 return Err(IoError::DuplicateColumnName(field.variable_name.clone()));
2458 }
2459 }
2460 Ok(fields)
2461}
2462
2463fn validate_stata_variable_name(name: &str) -> Result<(), IoError> {
2464 if name.is_empty() {
2465 return Err(IoError::Stata(
2466 "Stata variable name cannot be empty".to_owned(),
2467 ));
2468 }
2469 if name.len() > 32 {
2470 return Err(IoError::Stata(format!(
2471 "Stata variable name '{name}' exceeds 32 bytes"
2472 )));
2473 }
2474 let mut chars = name.chars();
2475 let first = chars
2476 .next()
2477 .ok_or_else(|| IoError::Stata("Stata variable name cannot be empty".to_owned()))?;
2478 if !(first == '_' || first.is_ascii_alphabetic()) {
2479 return Err(IoError::Stata(format!(
2480 "invalid Stata variable name '{name}': first character must be ASCII letter or '_'"
2481 )));
2482 }
2483 if !chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) {
2484 return Err(IoError::Stata(format!(
2485 "invalid Stata variable name '{name}': only ASCII letters, digits, and '_' are supported"
2486 )));
2487 }
2488 Ok(())
2489}
2490
2491fn stata_index_variable_type(frame: &DataFrame) -> Result<VariableType, IoError> {
2492 let max_len = frame
2493 .index()
2494 .labels()
2495 .iter()
2496 .map(|label| label.to_string().len())
2497 .max()
2498 .unwrap_or(1)
2499 .max(1);
2500 stata_fixed_string_type(max_len, "index")
2501}
2502
2503fn infer_stata_variable_type(column: &Column, name: &str) -> Result<VariableType, IoError> {
2504 let mut saw_numeric = false;
2505 let mut saw_float = false;
2506 let mut saw_string = false;
2507 let mut max_string_len = 1usize;
2508
2509 for value in column.values() {
2510 match value {
2511 Scalar::Null(_) => {}
2512 Scalar::Bool(_) => {
2513 saw_numeric = true;
2514 }
2515 Scalar::Int64(v) => {
2516 saw_numeric = true;
2517 if i32::try_from(*v).is_err() {
2518 return Err(IoError::Stata(format!(
2519 "Stata long column '{name}' cannot encode i64 value {v}"
2520 )));
2521 }
2522 }
2523 Scalar::Float64(v) => {
2524 if !v.is_nan() {
2525 saw_numeric = true;
2526 saw_float = true;
2527 }
2528 }
2529 Scalar::Utf8(text) => {
2530 saw_string = true;
2531 max_string_len = max_string_len.max(text.len());
2532 }
2533 other => {
2534 saw_string = true;
2535 max_string_len = max_string_len.max(scalar_to_table_with_na(other, "").len());
2536 }
2537 }
2538 }
2539
2540 if saw_string {
2541 stata_fixed_string_type(max_string_len, name)
2542 } else if saw_numeric && !saw_float {
2543 Ok(VariableType::Long)
2544 } else {
2545 Ok(VariableType::Double)
2546 }
2547}
2548
2549fn stata_fixed_string_type(len: usize, name: &str) -> Result<VariableType, IoError> {
2550 let width = len.max(1);
2551 let width = u16::try_from(width).map_err(|_| {
2552 IoError::Stata(format!(
2553 "Stata string column '{name}' exceeds fixed string capacity"
2554 ))
2555 })?;
2556 if width > 2045 {
2557 return Err(IoError::Stata(format!(
2558 "Stata string column '{name}' requires strL; this slice supports fixed strings only"
2559 )));
2560 }
2561 Ok(VariableType::FixedString(width))
2562}
2563
2564fn stata_format_for_type(variable_type: VariableType) -> &'static str {
2565 match variable_type {
2566 VariableType::Byte | VariableType::Int | VariableType::Long => "%12.0g",
2567 VariableType::Float | VariableType::Double => "%10.0g",
2568 VariableType::FixedString(_) | VariableType::LongString => "%9s",
2569 }
2570}
2571
2572fn stata_value_for_field(
2573 frame: &DataFrame,
2574 row_idx: usize,
2575 field: &StataField,
2576) -> Result<StataValue<'static>, IoError> {
2577 match field.source {
2578 StataFieldSource::Index => Ok(StataValue::String(std::borrow::Cow::Owned(
2579 index_label_string(frame, row_idx)?,
2580 ))),
2581 StataFieldSource::Column(ref name) => {
2582 let value = frame.column(name).and_then(|column| column.value(row_idx));
2583 scalar_to_stata_value(value, field.variable_type, name)
2584 }
2585 }
2586}
2587
2588fn scalar_to_stata_value(
2589 value: Option<&Scalar>,
2590 variable_type: VariableType,
2591 name: &str,
2592) -> Result<StataValue<'static>, IoError> {
2593 match variable_type {
2594 VariableType::Long => match value {
2595 Some(Scalar::Bool(v)) => Ok(StataValue::Long(StataLong::Present(i32::from(*v)))),
2596 Some(Scalar::Int64(v)) => Ok(StataValue::Long(StataLong::Present(
2597 i32::try_from(*v).map_err(|_| {
2598 IoError::Stata(format!("Stata long column '{name}' cannot encode {v}"))
2599 })?,
2600 ))),
2601 Some(Scalar::Null(_)) | None => {
2602 Ok(StataValue::Long(StataLong::Missing(MissingValue::System)))
2603 }
2604 Some(other) => Err(IoError::Stata(format!(
2605 "Stata long column '{name}' cannot encode {other:?}"
2606 ))),
2607 },
2608 VariableType::Double => match value {
2609 Some(Scalar::Bool(v)) => Ok(StataValue::Double(StataDouble::Present(if *v {
2610 1.0
2611 } else {
2612 0.0
2613 }))),
2614 Some(Scalar::Int64(v)) => Ok(StataValue::Double(StataDouble::Present(*v as f64))),
2615 Some(Scalar::Float64(v)) if v.is_nan() => Ok(StataValue::Double(StataDouble::Missing(
2616 MissingValue::System,
2617 ))),
2618 Some(Scalar::Float64(v)) => Ok(StataValue::Double(StataDouble::Present(*v))),
2619 Some(Scalar::Null(_)) | None => Ok(StataValue::Double(StataDouble::Missing(
2620 MissingValue::System,
2621 ))),
2622 Some(other) => Err(IoError::Stata(format!(
2623 "Stata double column '{name}' cannot encode {other:?}"
2624 ))),
2625 },
2626 VariableType::FixedString(_) => {
2627 let text = match value {
2628 Some(Scalar::Null(_)) | None => String::new(),
2629 Some(scalar) => scalar_to_table_with_na(scalar, ""),
2630 };
2631 Ok(StataValue::String(std::borrow::Cow::Owned(text)))
2632 }
2633 VariableType::Byte | VariableType::Int | VariableType::Float | VariableType::LongString => {
2634 Err(IoError::Stata(format!(
2635 "unsupported Stata variable type for column '{name}': {variable_type:?}"
2636 )))
2637 }
2638 }
2639}
2640
2641fn stata_value_to_scalar(value: &StataValue<'_>) -> Result<Scalar, IoError> {
2642 match value {
2643 StataValue::Byte(v) => Ok(v
2644 .present()
2645 .map(|value| Scalar::Int64(i64::from(value)))
2646 .unwrap_or(Scalar::Null(NullKind::NaN))),
2647 StataValue::Int(v) => Ok(v
2648 .present()
2649 .map(|value| Scalar::Int64(i64::from(value)))
2650 .unwrap_or(Scalar::Null(NullKind::NaN))),
2651 StataValue::Long(v) => Ok(v
2652 .present()
2653 .map(|value| Scalar::Int64(i64::from(value)))
2654 .unwrap_or(Scalar::Null(NullKind::NaN))),
2655 StataValue::Float(v) => Ok(v
2656 .present()
2657 .map(|value| Scalar::Float64(f64::from(value)))
2658 .unwrap_or(Scalar::Null(NullKind::NaN))),
2659 StataValue::Double(v) => Ok(v
2660 .present()
2661 .map(Scalar::Float64)
2662 .unwrap_or(Scalar::Null(NullKind::NaN))),
2663 StataValue::String(text) => Ok(Scalar::Utf8(text.to_string())),
2664 StataValue::LongStringRef(_) => Err(IoError::Stata(
2665 "Stata strL values are not supported by this reader slice".to_owned(),
2666 )),
2667 }
2668}
2669
2670fn stata_error<E: std::fmt::Display>(err: E) -> IoError {
2671 IoError::Stata(err.to_string())
2672}
2673
2674pub fn read_xml_str(input: &str) -> Result<DataFrame, IoError> {
2682 read_xml_str_with_options(input, &XmlReadOptions::default())
2683}
2684
2685pub fn read_xml_str_with_options(
2687 input: &str,
2688 options: &XmlReadOptions,
2689) -> Result<DataFrame, IoError> {
2690 validate_xml_element_name(&options.row_name)?;
2691
2692 let mut reader = XmlReader::from_str(input);
2693 reader.config_mut().trim_text(false);
2694 let mut buf = Vec::new();
2695 let mut rows: Vec<BTreeMap<String, Scalar>> = Vec::new();
2696 let mut column_order = Vec::new();
2697 let mut seen_columns = HashSet::new();
2698 let mut current_row: Option<BTreeMap<String, Scalar>> = None;
2699 let mut current_field: Option<String> = None;
2700 let mut field_text = String::new();
2701 let mut xml_version = XmlVersion::Implicit1_0;
2702
2703 loop {
2704 match reader
2705 .read_event_into(&mut buf)
2706 .map_err(|err| IoError::Xml(err.to_string()))?
2707 {
2708 Event::Start(event) => {
2709 let name = xml_event_name(event.name())?;
2710 if current_row.is_none() {
2711 if name == options.row_name {
2712 current_row = Some(BTreeMap::new());
2713 }
2714 } else if let Some(field_name) = ¤t_field {
2715 return Err(IoError::Xml(format!(
2716 "nested xml element '{name}' inside field '{field_name}' is unsupported"
2717 )));
2718 } else {
2719 current_field = Some(name);
2720 field_text.clear();
2721 }
2722 }
2723 Event::Empty(event) => {
2724 let name = xml_event_name(event.name())?;
2725 if let Some(field_name) = ¤t_field {
2726 return Err(IoError::Xml(format!(
2727 "nested xml element '{name}' inside field '{field_name}' is unsupported"
2728 )));
2729 }
2730 if let Some(row) = current_row.as_mut() {
2731 insert_xml_field(
2732 row,
2733 &mut column_order,
2734 &mut seen_columns,
2735 name,
2736 Scalar::Null(NullKind::Null),
2737 )?;
2738 } else if name == options.row_name {
2739 rows.push(BTreeMap::new());
2740 }
2741 }
2742 Event::Text(event) => {
2743 if current_field.is_some() {
2744 let decoded = event
2745 .xml_content(xml_version)
2746 .map_err(|err| IoError::Xml(err.to_string()))?;
2747 field_text.push_str(&decoded);
2748 }
2749 }
2750 Event::CData(event) => {
2751 if current_field.is_some() {
2752 let decoded = event
2753 .xml_content(xml_version)
2754 .map_err(|err| IoError::Xml(err.to_string()))?;
2755 field_text.push_str(&decoded);
2756 }
2757 }
2758 Event::End(event) => {
2759 let name = xml_event_name(event.name())?;
2760 if let Some(field_name) = current_field.as_ref() {
2761 if name != *field_name {
2762 return Err(IoError::Xml(format!(
2763 "xml field '{field_name}' closed by mismatched element '{name}'"
2764 )));
2765 }
2766 let field_name = current_field.take().expect("field checked");
2767 let value = parse_scalar(&field_text);
2768 field_text.clear();
2769 let row = current_row
2770 .as_mut()
2771 .ok_or_else(|| IoError::Xml("xml field outside row".to_owned()))?;
2772 insert_xml_field(row, &mut column_order, &mut seen_columns, field_name, value)?;
2773 } else if name == options.row_name {
2774 let row = current_row.take().ok_or_else(|| {
2775 IoError::Xml("xml row closed before it opened".to_owned())
2776 })?;
2777 rows.push(row);
2778 }
2779 }
2780 Event::GeneralRef(reference) => {
2781 if current_field.is_some() {
2782 field_text.push_str(&decode_xml_general_ref(reference)?);
2783 }
2784 }
2785 Event::Eof => break,
2786 Event::Decl(decl) => {
2787 if let Ok(v) = decl.version() {
2788 xml_version = match v.as_ref() {
2789 b"1.0" => XmlVersion::Explicit1_0,
2790 b"1.1" => XmlVersion::Explicit1_1,
2791 _ => xml_version,
2792 };
2793 }
2794 }
2795 Event::PI(_) | Event::DocType(_) | Event::Comment(_) => {}
2796 }
2797 buf.clear();
2798 }
2799
2800 if current_field.is_some() || current_row.is_some() {
2801 return Err(IoError::Xml(
2802 "xml document ended inside an open row or field".to_owned(),
2803 ));
2804 }
2805 if rows.is_empty() {
2806 return Err(IoError::Xml(
2807 "xml input contains no row elements".to_owned(),
2808 ));
2809 }
2810
2811 let mut out_columns = BTreeMap::new();
2812 for name in &column_order {
2813 let values = rows
2814 .iter()
2815 .map(|row| {
2816 row.get(name)
2817 .cloned()
2818 .unwrap_or(Scalar::Null(NullKind::Null))
2819 })
2820 .collect::<Vec<_>>();
2821 out_columns.insert(name.clone(), Column::from_values(values)?);
2822 }
2823 let index = Index::from_i64((0..rows.len() as i64).collect());
2824 Ok(DataFrame::new_with_column_order(
2825 index,
2826 out_columns,
2827 column_order,
2828 )?)
2829}
2830
2831pub fn write_xml_string_with_options(
2833 frame: &DataFrame,
2834 options: &XmlWriteOptions,
2835) -> Result<String, IoError> {
2836 if options.include_index && frame.row_multiindex().is_some() {
2837 let materialized = materialize_named_row_multiindex_columns(frame)?;
2838 let mut nested_options = options.clone();
2839 nested_options.include_index = false;
2840 nested_options.index_label = None;
2841 return write_xml_string_with_options(&materialized, &nested_options);
2842 }
2843
2844 validate_xml_element_name(&options.root_name)?;
2845 validate_xml_element_name(&options.row_name)?;
2846
2847 let headers = frame
2848 .column_names()
2849 .into_iter()
2850 .cloned()
2851 .collect::<Vec<_>>();
2852 for name in &headers {
2853 validate_xml_element_name(name)?;
2854 }
2855
2856 let index_label = options
2857 .index_label
2858 .clone()
2859 .or_else(|| frame.index().name().map(ToOwned::to_owned))
2860 .unwrap_or_else(|| "index".to_owned());
2861 if options.include_index {
2862 validate_xml_element_name(&index_label)?;
2863 }
2864
2865 let mut out = String::new();
2866 out.push_str("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
2867 out.push('<');
2868 out.push_str(&options.root_name);
2869 out.push_str(">\n");
2870
2871 for row_idx in 0..frame.index().len() {
2872 out.push_str(" <");
2873 out.push_str(&options.row_name);
2874 out.push_str(">\n");
2875
2876 if options.include_index {
2877 let value = index_label_string(frame, row_idx)?;
2878 push_xml_field(&mut out, &index_label, Some(&value));
2879 }
2880
2881 for name in &headers {
2882 let value = frame
2883 .column(name)
2884 .and_then(|column| column.value(row_idx))
2885 .and_then(scalar_to_xml_value);
2886 push_xml_field(&mut out, name, value.as_deref());
2887 }
2888
2889 out.push_str(" </");
2890 out.push_str(&options.row_name);
2891 out.push_str(">\n");
2892 }
2893
2894 out.push_str("</");
2895 out.push_str(&options.root_name);
2896 out.push_str(">\n");
2897 Ok(out)
2898}
2899
2900fn xml_event_name(name: quick_xml::name::QName<'_>) -> Result<String, IoError> {
2901 std::str::from_utf8(name.as_ref())
2902 .map(ToOwned::to_owned)
2903 .map_err(|err| IoError::Xml(format!("invalid utf-8 xml element name: {err}")))
2904}
2905
2906fn decode_xml_general_ref(reference: quick_xml::events::BytesRef<'_>) -> Result<String, IoError> {
2907 let raw = std::str::from_utf8(reference.as_ref())
2908 .map_err(|err| IoError::Xml(format!("invalid utf-8 xml entity reference: {err}")))?;
2909 match raw {
2910 "amp" => Ok("&".to_owned()),
2911 "lt" => Ok("<".to_owned()),
2912 "gt" => Ok(">".to_owned()),
2913 "quot" => Ok("\"".to_owned()),
2914 "apos" => Ok("'".to_owned()),
2915 _ if raw.starts_with("#x") => {
2916 let value = u32::from_str_radix(&raw[2..], 16)
2917 .map_err(|err| IoError::Xml(format!("invalid hex xml entity '&{raw};': {err}")))?;
2918 char::from_u32(value)
2919 .map(|ch| ch.to_string())
2920 .ok_or_else(|| IoError::Xml(format!("invalid unicode xml entity '&{raw};'")))
2921 }
2922 _ if raw.starts_with('#') => {
2923 let value = raw[1..].parse::<u32>().map_err(|err| {
2924 IoError::Xml(format!("invalid decimal xml entity '&{raw};': {err}"))
2925 })?;
2926 char::from_u32(value)
2927 .map(|ch| ch.to_string())
2928 .ok_or_else(|| IoError::Xml(format!("invalid unicode xml entity '&{raw};'")))
2929 }
2930 _ => Err(IoError::Xml(format!(
2931 "unsupported xml entity reference '&{raw};'"
2932 ))),
2933 }
2934}
2935
2936fn insert_xml_field(
2937 row: &mut BTreeMap<String, Scalar>,
2938 column_order: &mut Vec<String>,
2939 seen_columns: &mut HashSet<String>,
2940 name: String,
2941 value: Scalar,
2942) -> Result<(), IoError> {
2943 if row.insert(name.clone(), value).is_some() {
2944 return Err(IoError::Xml(format!("duplicate xml field '{name}' in row")));
2945 }
2946 if seen_columns.insert(name.clone()) {
2947 column_order.push(name);
2948 }
2949 Ok(())
2950}
2951
2952fn validate_xml_element_name(name: &str) -> Result<(), IoError> {
2953 let mut chars = name.chars();
2954 let Some(first) = chars.next() else {
2955 return Err(IoError::Xml(
2956 "xml element name must be non-empty".to_owned(),
2957 ));
2958 };
2959 let valid_first = first == '_' || first.is_ascii_alphabetic();
2960 let valid_rest =
2961 chars.all(|ch| ch == '_' || ch == '-' || ch == '.' || ch.is_ascii_alphanumeric());
2962 if valid_first && valid_rest {
2963 Ok(())
2964 } else {
2965 Err(IoError::Xml(format!("invalid xml element name '{name}'")))
2966 }
2967}
2968
2969fn push_xml_field(out: &mut String, name: &str, value: Option<&str>) {
2970 out.push_str(" <");
2971 out.push_str(name);
2972 match value {
2973 Some(value) => {
2974 out.push('>');
2975 out.push_str(&escape_xml_text(value));
2976 out.push_str("</");
2977 out.push_str(name);
2978 out.push_str(">\n");
2979 }
2980 None => out.push_str("/>\n"),
2981 }
2982}
2983
2984fn escape_xml_text(value: &str) -> String {
2985 let mut escaped = String::with_capacity(value.len());
2986 let mut chars = value.chars().peekable();
2987 while let Some(ch) = chars.next() {
2988 match ch {
2989 '&' => escaped.push_str("&"),
2990 '<' => escaped.push_str("<"),
2991 '>' => escaped.push_str(">"),
2992 '\r' => {
2993 escaped.push('\n');
2994 if chars.peek() == Some(&'\n') {
2995 chars.next();
2996 }
2997 }
2998 _ => escaped.push(ch),
2999 }
3000 }
3001 escaped
3002}
3003
3004fn scalar_to_xml_value(scalar: &Scalar) -> Option<String> {
3005 match scalar {
3006 Scalar::Null(_) => None,
3007 Scalar::Bool(value) => Some(if *value { "True" } else { "False" }.to_owned()),
3008 Scalar::Int64(value) => Some(value.to_string()),
3009 Scalar::Float64(value) => {
3010 if value.is_nan() {
3011 None
3012 } else if value.is_finite() && *value == value.round() && value.abs() < 1e15 {
3013 Some(format!("{value:.1}"))
3014 } else {
3015 Some(value.to_string())
3016 }
3017 }
3018 Scalar::Utf8(value) => Some(value.clone()),
3019 Scalar::Timedelta64(value) => {
3020 if *value == Timedelta::NAT {
3021 None
3022 } else {
3023 Some(Timedelta::format(*value))
3024 }
3025 }
3026 Scalar::Datetime64(value) => {
3027 if *value == Timestamp::NAT {
3028 None
3029 } else {
3030 Some(format_datetime_ns(*value))
3031 }
3032 }
3033 Scalar::Period(value) => {
3034 if *value == i64::MIN {
3035 None
3036 } else {
3037 Some(format!("Period[{value}]"))
3038 }
3039 }
3040 Scalar::Interval(iv) => Some(format!("{iv}")),
3041 }
3042}
3043
3044fn html_selector(pattern: &str) -> Result<Selector, IoError> {
3045 Selector::parse(pattern).map_err(|err| {
3046 IoError::Html(format!(
3047 "invalid built-in html selector {pattern:?}: {err:?}"
3048 ))
3049 })
3050}
3051
3052fn html_row_cells(row: ElementRef<'_>, cell_selector: &Selector) -> Vec<String> {
3053 row.select(cell_selector)
3054 .map(|cell| cell.text().collect::<String>().trim().to_owned())
3055 .collect()
3056}
3057
3058fn normalize_html_headers(raw_headers: &[String]) -> Result<Vec<String>, IoError> {
3059 if raw_headers.is_empty() {
3060 return Err(IoError::Html(
3061 "html table header row contains no cells".to_owned(),
3062 ));
3063 }
3064
3065 let mut seen = HashSet::new();
3066 let mut headers = Vec::with_capacity(raw_headers.len());
3067 for (idx, raw) in raw_headers.iter().enumerate() {
3068 let name = if raw.trim().is_empty() {
3069 format!("Unnamed: {idx}")
3070 } else {
3071 raw.trim().to_owned()
3072 };
3073 if !seen.insert(name.clone()) {
3074 return Err(IoError::DuplicateColumnName(name));
3075 }
3076 headers.push(name);
3077 }
3078 Ok(headers)
3079}
3080
3081fn html_rows_to_frame(
3082 column_order: Vec<String>,
3083 rows: Vec<Vec<String>>,
3084) -> Result<DataFrame, IoError> {
3085 let width = column_order.len();
3086 if width == 0 {
3087 return Err(IoError::Html(
3088 "html table must contain at least one column".to_owned(),
3089 ));
3090 }
3091
3092 let mut values_by_column = column_order
3093 .iter()
3094 .map(|name| (name.clone(), Vec::with_capacity(rows.len())))
3095 .collect::<BTreeMap<_, _>>();
3096 for (row_idx, row) in rows.iter().enumerate() {
3097 if row.len() > width {
3098 return Err(IoError::Html(format!(
3099 "html row {row_idx} has {} cells but header has {width}",
3100 row.len()
3101 )));
3102 }
3103 for (col_idx, name) in column_order.iter().enumerate() {
3104 let value = row
3105 .get(col_idx)
3106 .map_or(Scalar::Null(NullKind::Null), |cell| parse_scalar(cell));
3107 let column_values = values_by_column.get_mut(name).ok_or_else(|| {
3108 IoError::Html(format!("html column '{name}' was not initialized"))
3109 })?;
3110 column_values.push(value);
3111 }
3112 }
3113
3114 let mut columns = BTreeMap::new();
3115 for name in &column_order {
3116 let values = values_by_column
3117 .remove(name)
3118 .ok_or_else(|| IoError::Html(format!("html column '{name}' has no values")))?;
3119 columns.insert(name.clone(), Column::from_values(values)?);
3120 }
3121 let row_count = i64::try_from(rows.len()).map_err(|_| {
3122 IoError::Html(format!(
3123 "html table row count {} exceeds supported i64 index range",
3124 rows.len()
3125 ))
3126 })?;
3127 Ok(DataFrame::new_with_column_order(
3128 Index::from_i64((0..row_count).collect()),
3129 columns,
3130 column_order,
3131 )?)
3132}
3133
3134fn resolve_csv_index_header(frame: &DataFrame, options: &CsvWriteOptions) -> String {
3135 options
3136 .index_label
3137 .clone()
3138 .or_else(|| frame.index().name().map(ToOwned::to_owned))
3139 .unwrap_or_default()
3140}
3141
3142fn resolve_table_index_header(frame: &DataFrame, index_label: Option<&str>) -> String {
3143 index_label
3144 .map(ToOwned::to_owned)
3145 .or_else(|| frame.index().name().map(ToOwned::to_owned))
3146 .unwrap_or_default()
3147}
3148
3149fn index_label_string(frame: &DataFrame, row_idx: usize) -> Result<String, IoError> {
3150 frame
3151 .index()
3152 .labels()
3153 .get(row_idx)
3154 .map(ToString::to_string)
3155 .ok_or_else(|| {
3156 IoError::Frame(FrameError::CompatibilityRejected(format!(
3157 "index position {row_idx} out of bounds for index length {}",
3158 frame.index().len()
3159 )))
3160 })
3161}
3162
3163fn index_datetime_csv_format(frame: &DataFrame) -> Option<DatetimeCsvFormat> {
3168 let labels = frame.index().labels();
3169 let mut any_datetime = false;
3170 let mut date_only = true;
3171 let mut subsec_digits = 0u8;
3172 for label in labels {
3173 let IndexLabel::Datetime64(ns) = label else {
3174 return None;
3175 };
3176 any_datetime = true;
3177 if *ns == i64::MIN {
3178 continue; }
3180 let subsec = (ns.rem_euclid(1_000_000_000)) as u32;
3181 if ns.div_euclid(1_000_000_000).rem_euclid(86_400) != 0 || subsec != 0 {
3182 date_only = false;
3183 }
3184 if subsec != 0 {
3185 let digits = if !subsec.is_multiple_of(1_000) {
3186 9
3187 } else if !subsec.is_multiple_of(1_000_000) {
3188 6
3189 } else {
3190 3
3191 };
3192 subsec_digits = subsec_digits.max(digits);
3193 }
3194 }
3195 any_datetime.then_some(DatetimeCsvFormat {
3196 date_only,
3197 subsec_digits,
3198 })
3199}
3200
3201fn index_label_csv_string(
3204 frame: &DataFrame,
3205 row_idx: usize,
3206 dt_format: Option<DatetimeCsvFormat>,
3207) -> Result<String, IoError> {
3208 if let Some(fmt) = dt_format
3209 && let Some(IndexLabel::Datetime64(ns)) = frame.index().labels().get(row_idx)
3210 && *ns != i64::MIN
3211 {
3212 return Ok(format_datetime_csv(*ns, fmt));
3213 }
3214 index_label_string(frame, row_idx)
3215}
3216
3217#[derive(Clone, Copy)]
3224struct DatetimeCsvFormat {
3225 date_only: bool,
3226 subsec_digits: u8,
3227}
3228
3229fn datetime_csv_format(column: &Column) -> DatetimeCsvFormat {
3231 let mut date_only = true;
3232 let mut subsec_digits = 0u8;
3233 for value in column.values() {
3234 let Scalar::Datetime64(ns) = value else {
3235 continue;
3236 };
3237 if *ns == Timestamp::NAT {
3238 continue;
3239 }
3240 let subsec = (ns.rem_euclid(1_000_000_000)) as u32;
3241 if ns.div_euclid(1_000_000_000).rem_euclid(86_400) != 0 || subsec != 0 {
3244 date_only = false;
3245 }
3246 if subsec != 0 {
3247 let digits = if !subsec.is_multiple_of(1_000) {
3248 9
3249 } else if !subsec.is_multiple_of(1_000_000) {
3250 6
3251 } else {
3252 3
3253 };
3254 subsec_digits = subsec_digits.max(digits);
3255 }
3256 }
3257 DatetimeCsvFormat {
3258 date_only,
3259 subsec_digits,
3260 }
3261}
3262
3263fn format_datetime_csv(nanos: i64, fmt: DatetimeCsvFormat) -> String {
3265 let base = format_datetime_ns(nanos);
3268 if fmt.date_only {
3269 return base[..10].to_owned();
3270 }
3271 if fmt.subsec_digits == 0 {
3272 return base;
3273 }
3274 let subsec = (nanos.rem_euclid(1_000_000_000)) as u32;
3275 let frac = subsec / 10u32.pow(9 - u32::from(fmt.subsec_digits));
3276 format!(
3277 "{base}.{frac:0>width$}",
3278 width = usize::from(fmt.subsec_digits)
3279 )
3280}
3281
3282fn scalar_to_csv_with_na(scalar: &Scalar, na_rep: &str) -> String {
3283 match scalar {
3284 Scalar::Null(_) => na_rep.to_owned(),
3285 Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3286 Scalar::Float64(v) => format_pandas_float(*v),
3291 Scalar::Timedelta64(v) if *v == Timedelta::NAT => na_rep.to_owned(),
3292 other => scalar_to_csv(other),
3293 }
3294}
3295
3296fn scalar_to_csv_cell(
3299 scalar: &Scalar,
3300 na_rep: &str,
3301 dt_format: Option<DatetimeCsvFormat>,
3302) -> String {
3303 if let (Scalar::Datetime64(ns), Some(fmt)) = (scalar, dt_format) {
3304 if *ns == Timestamp::NAT {
3305 return na_rep.to_owned();
3306 }
3307 return format_datetime_csv(*ns, fmt);
3308 }
3309 scalar_to_csv_with_na(scalar, na_rep)
3310}
3311
3312fn scalar_to_table_with_na(scalar: &Scalar, na_rep: &str) -> String {
3313 match scalar {
3314 Scalar::Null(_) => na_rep.to_owned(),
3315 Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3316 Scalar::Timedelta64(v) if *v == Timedelta::NAT => na_rep.to_owned(),
3317 other => scalar_to_csv(other),
3318 }
3319}
3320
3321fn scalar_to_latex_cell(scalar: &Scalar, na_rep: &str) -> String {
3326 match scalar {
3327 Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3328 Scalar::Float64(v) => format!("{v:.6}"),
3331 other => scalar_to_table_with_na(other, na_rep),
3332 }
3333}
3334
3335fn push_markdown_row(out: &mut String, cells: &[String]) {
3336 out.push('|');
3337 for cell in cells {
3338 out.push(' ');
3339 out.push_str(&escape_markdown_table_cell(cell));
3340 out.push_str(" |");
3341 }
3342 out.push('\n');
3343}
3344
3345fn escape_markdown_table_cell(value: &str) -> String {
3346 let mut escaped = String::with_capacity(value.len());
3347 for ch in value.chars() {
3348 match ch {
3349 '\\' => escaped.push_str("\\\\"),
3350 '|' => escaped.push_str("\\|"),
3351 '\n' | '\r' => escaped.push(' '),
3352 _ => escaped.push(ch),
3353 }
3354 }
3355 escaped
3356}
3357
3358fn push_latex_row(out: &mut String, cells: &[String], escape: bool) {
3359 for (idx, cell) in cells.iter().enumerate() {
3360 if idx > 0 {
3361 out.push_str(" & ");
3362 }
3363 if escape {
3364 out.push_str(&escape_latex_table_cell(cell));
3365 } else {
3366 out.push_str(cell);
3367 }
3368 }
3369 out.push_str(" \\\\\n");
3370}
3371
3372fn escape_latex_table_cell(value: &str) -> String {
3373 let mut escaped = String::with_capacity(value.len());
3374 for ch in value.chars() {
3375 match ch {
3376 '&' => escaped.push_str("\\&"),
3377 '%' => escaped.push_str("\\%"),
3378 '$' => escaped.push_str("\\$"),
3379 '#' => escaped.push_str("\\#"),
3380 '_' => escaped.push_str("\\_"),
3381 '{' => escaped.push_str("\\{"),
3382 '}' => escaped.push_str("\\}"),
3383 '~' => escaped.push_str("\\textasciitilde{}"),
3384 '^' => escaped.push_str("\\textasciicircum{}"),
3385 '\\' => escaped.push_str("\\textbackslash{}"),
3386 '\n' | '\r' => escaped.push(' '),
3387 _ => escaped.push(ch),
3388 }
3389 }
3390 escaped
3391}
3392
3393fn is_pandas_default_na(s: &str) -> bool {
3394 matches!(
3397 s,
3398 "" | "#N/A"
3399 | "#N/A N/A"
3400 | "#NA"
3401 | "-1.#IND"
3402 | "-1.#QNAN"
3403 | "-NaN"
3404 | "-nan"
3405 | "1.#IND"
3406 | "1.#QNAN"
3407 | "<NA>"
3408 | "N/A"
3409 | "NA"
3410 | "NULL"
3411 | "NaN"
3412 | "None"
3413 | "n/a"
3414 | "nan"
3415 | "null"
3416 )
3417}
3418
3419fn parse_scalar(field: &str) -> Scalar {
3420 if is_pandas_default_na(field) {
3426 return Scalar::Null(NullKind::Null);
3427 }
3428
3429 let trimmed = field.trim();
3430 if let Ok(value) = trimmed.parse::<i64>() {
3431 return Scalar::Int64(value);
3432 }
3433 if let Ok(value) = trimmed.parse::<f64>() {
3434 return Scalar::Float64(value);
3435 }
3436 if field.eq_ignore_ascii_case("true") {
3437 return Scalar::Bool(true);
3438 }
3439 if field.eq_ignore_ascii_case("false") {
3440 return Scalar::Bool(false);
3441 }
3442
3443 Scalar::Utf8(field.to_owned())
3444}
3445
3446fn format_pandas_float(v: f64) -> String {
3452 let s = format!("{v:?}");
3457 match s.split_once('e') {
3458 None => s,
3459 Some((mantissa, exp)) => {
3460 let (sign, digits) = match exp.strip_prefix('-') {
3461 Some(d) => ('-', d),
3462 None => ('+', exp.strip_prefix('+').unwrap_or(exp)),
3463 };
3464 format!("{mantissa}e{sign}{digits:0>2}")
3465 }
3466 }
3467}
3468
3469fn scalar_to_csv(scalar: &Scalar) -> String {
3470 match scalar {
3471 Scalar::Null(_) => String::new(),
3472 Scalar::Bool(v) => if *v { "True" } else { "False" }.to_string(),
3474 Scalar::Int64(v) => v.to_string(),
3475 Scalar::Float64(v) => {
3476 if v.is_nan() {
3477 String::new()
3478 } else {
3479 v.to_string()
3480 }
3481 }
3482 Scalar::Utf8(v) => v.clone(),
3483 Scalar::Timedelta64(v) => {
3484 if *v == Timedelta::NAT {
3485 String::new()
3486 } else {
3487 Timedelta::format(*v)
3488 }
3489 }
3490 Scalar::Datetime64(v) => {
3491 if *v == Timestamp::NAT {
3492 String::new()
3493 } else {
3494 format_datetime_ns(*v)
3495 }
3496 }
3497 Scalar::Period(v) => {
3498 if *v == i64::MIN {
3499 String::new()
3500 } else {
3501 format!("Period[{v}]")
3502 }
3503 }
3504 Scalar::Interval(iv) => format!("{iv}"),
3505 }
3506}
3507
3508#[allow(clippy::too_many_arguments)]
3517fn parse_scalar_with_options(
3518 field: &str,
3519 na_filter: bool,
3520 keep_default_na: bool,
3521 na_set: &HashSet<&str>,
3522 true_set: &HashSet<&str>,
3523 false_set: &HashSet<&str>,
3524 decimal: u8,
3525 thousands: Option<u8>,
3526) -> Scalar {
3527 let trimmed = field.trim();
3531
3532 if na_filter {
3534 let is_default_na = keep_default_na && is_pandas_default_na(field);
3535 let is_custom_na = na_set.contains(field);
3536 if is_default_na || is_custom_na {
3537 return Scalar::Null(NullKind::Null);
3538 }
3539 }
3540
3541 let thousands_effective = thousands.filter(|t| *t != decimal);
3544 let numeric_candidate: Cow<'_, str> = if let Some(t) = thousands_effective {
3545 let ch = char::from(t);
3546 if trimmed.contains(ch) {
3547 Cow::Owned(trimmed.replace(ch, ""))
3548 } else {
3549 Cow::Borrowed(trimmed)
3550 }
3551 } else {
3552 Cow::Borrowed(trimmed)
3553 };
3554
3555 if let Ok(value) = numeric_candidate.as_ref().parse::<i64>() {
3556 return Scalar::Int64(value);
3557 }
3558
3559 let decimal_ch = char::from(decimal);
3560 let float_candidate: Cow<'_, str> = if decimal == b'.' {
3561 Cow::Borrowed(numeric_candidate.as_ref())
3562 } else if numeric_candidate.contains(decimal_ch) {
3563 Cow::Owned(numeric_candidate.replace(decimal_ch, "."))
3564 } else {
3565 Cow::Borrowed(numeric_candidate.as_ref())
3566 };
3567 if let Ok(value) = float_candidate.as_ref().parse::<f64>() {
3568 return Scalar::Float64(value);
3569 }
3570
3571 if true_set.contains(field) {
3572 return Scalar::Bool(true);
3573 }
3574 if false_set.contains(field) {
3575 return Scalar::Bool(false);
3576 }
3577
3578 if field.eq_ignore_ascii_case("true") {
3579 return Scalar::Bool(true);
3580 }
3581 if field.eq_ignore_ascii_case("false") {
3582 return Scalar::Bool(false);
3583 }
3584 Scalar::Utf8(field.to_owned())
3585}
3586
3587fn reject_duplicate_headers(headers: &[String]) -> Result<(), IoError> {
3588 let mut used = BTreeSet::new();
3589 for name in headers {
3590 if !used.insert(name.clone()) {
3591 return Err(IoError::DuplicateColumnName(name.clone()));
3592 }
3593 }
3594 Ok(())
3595}
3596
3597fn validate_usecols(headers: &[String], usecols: &[String]) -> Result<(), IoError> {
3598 let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3599 let mut missing = Vec::new();
3600 for name in usecols {
3601 if !header_set.contains(name) {
3602 missing.push(name.clone());
3603 }
3604 }
3605 if missing.is_empty() {
3606 Ok(())
3607 } else {
3608 Err(IoError::MissingUsecols(missing))
3609 }
3610}
3611
3612fn validate_parse_dates(headers: &[String], parse_dates: &[String]) -> Result<(), IoError> {
3613 let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3614 let mut missing = Vec::new();
3615 for name in parse_dates {
3616 if !header_set.contains(name) {
3617 missing.push(name.clone());
3618 }
3619 }
3620 if missing.is_empty() {
3621 Ok(())
3622 } else {
3623 Err(IoError::MissingParseDateColumns(missing))
3624 }
3625}
3626
3627fn validate_parse_date_combinations(
3628 headers: &[String],
3629 parse_date_combinations: &[Vec<String>],
3630) -> Result<(), IoError> {
3631 let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3632 let mut missing = BTreeSet::new();
3633 for combo in parse_date_combinations {
3634 for name in combo {
3635 if !header_set.contains(name) {
3636 missing.insert(name.clone());
3637 }
3638 }
3639 }
3640 if missing.is_empty() {
3641 Ok(())
3642 } else {
3643 Err(IoError::MissingParseDateColumns(
3644 missing.into_iter().collect(),
3645 ))
3646 }
3647}
3648
3649fn apply_parse_dates(
3650 headers: &[String],
3651 columns: &mut [Vec<Scalar>],
3652 parse_dates: &[String],
3653) -> Result<(), IoError> {
3654 if parse_dates.is_empty() {
3655 return Ok(());
3656 }
3657
3658 validate_parse_dates(headers, parse_dates)?;
3659
3660 for column_name in parse_dates {
3661 let Some(column_idx) = headers.iter().position(|header| header == column_name) else {
3662 continue;
3663 };
3664
3665 let index_labels = (0..columns[column_idx].len() as i64)
3666 .map(IndexLabel::Int64)
3667 .collect::<Vec<_>>();
3668 let series = Series::from_values(
3669 column_name.clone(),
3670 index_labels,
3671 columns[column_idx].clone(),
3672 )?;
3673 if let Some(parsed) = parse_csv_datetime_column(&series)? {
3674 columns[column_idx] = parsed.values().to_vec();
3675 }
3676 }
3677
3678 Ok(())
3679}
3680
3681fn parse_sql_float_text(text: &str) -> Option<f64> {
3682 let trimmed = text.trim();
3683 if trimmed.is_empty() {
3684 return None;
3685 }
3686
3687 let mut normalized = String::with_capacity(trimmed.len());
3688 for ch in trimmed.chars() {
3689 match ch {
3690 ',' => {}
3691 '$' if normalized.is_empty() || normalized == "+" || normalized == "-" => {}
3692 _ => normalized.push(ch),
3693 }
3694 }
3695
3696 if matches!(normalized.as_str(), "" | "+" | "-" | ".") {
3697 return None;
3698 }
3699
3700 let value = normalized.parse::<f64>().ok()?;
3701 value.is_finite().then_some(value)
3702}
3703
3704fn apply_sql_coerce_float(columns: &mut [Vec<Scalar>]) {
3705 for column in columns {
3706 let mut saw_text_float = false;
3707 let mut parsed_values = Vec::with_capacity(column.len());
3708
3709 for value in column.iter() {
3710 match value {
3711 Scalar::Utf8(text) => {
3712 let Some(parsed) = parse_sql_float_text(text) else {
3713 saw_text_float = false;
3714 parsed_values.clear();
3715 break;
3716 };
3717 saw_text_float = true;
3718 parsed_values.push(Some(parsed));
3719 }
3720 Scalar::Null(_) | Scalar::Int64(_) | Scalar::Float64(_) => {
3721 parsed_values.push(None);
3722 }
3723 Scalar::Bool(_)
3724 | Scalar::Timedelta64(_)
3725 | Scalar::Datetime64(_)
3726 | Scalar::Period(_)
3727 | Scalar::Interval(_) => {
3728 saw_text_float = false;
3729 parsed_values.clear();
3730 break;
3731 }
3732 }
3733 }
3734
3735 if !saw_text_float {
3736 continue;
3737 }
3738
3739 for (value, parsed) in column.iter_mut().zip(parsed_values) {
3740 if let Some(parsed) = parsed {
3741 *value = Scalar::Float64(parsed);
3742 }
3743 }
3744 }
3745}
3746
3747fn combine_parse_date_values(column_group: &[Vec<Scalar>]) -> Vec<Scalar> {
3748 let len = column_group.first().map_or(0, Vec::len);
3749 let mut combined = Vec::with_capacity(len);
3750
3751 for row in 0..len {
3752 if column_group
3753 .iter()
3754 .any(|column| matches!(column[row], Scalar::Null(_)))
3755 {
3756 combined.push(Scalar::Null(NullKind::NaT));
3757 continue;
3758 }
3759
3760 let joined = column_group
3761 .iter()
3762 .map(|column| match &column[row] {
3763 Scalar::Utf8(value) => value.clone(),
3764 other => other.to_string(),
3765 })
3766 .collect::<Vec<_>>()
3767 .join(" ");
3768 combined.push(Scalar::Utf8(joined));
3769 }
3770
3771 combined
3772}
3773
3774fn apply_one_parse_date_combination(
3775 headers: &mut Vec<String>,
3776 columns: &mut Vec<Vec<Scalar>>,
3777 combined_name: String,
3778 sources: &[String],
3779) -> Result<(), IoError> {
3780 let mut positions = sources
3781 .iter()
3782 .map(|name| {
3783 headers
3784 .iter()
3785 .position(|header| header == name)
3786 .ok_or_else(|| IoError::MissingParseDateColumns(vec![name.clone()]))
3787 })
3788 .collect::<Result<Vec<_>, _>>()?;
3789 positions.sort_unstable();
3790
3791 let index_labels = (0..columns[positions[0]].len() as i64)
3792 .map(IndexLabel::Int64)
3793 .collect::<Vec<_>>();
3794 let combined_values = combine_parse_date_values(
3795 &positions
3796 .iter()
3797 .map(|&idx| columns[idx].clone())
3798 .collect::<Vec<_>>(),
3799 );
3800 let combined_series =
3801 Series::from_values(combined_name.clone(), index_labels, combined_values)?;
3802 let parsed = parse_csv_datetime_column(&combined_series)?.unwrap_or(combined_series);
3803
3804 for idx in positions.iter().rev() {
3805 headers.remove(*idx);
3806 columns.remove(*idx);
3807 }
3808 headers.insert(positions[0], combined_name);
3809 columns.insert(positions[0], parsed.values().to_vec());
3810 Ok(())
3811}
3812
3813fn parse_csv_datetime_column(series: &Series) -> Result<Option<Series>, IoError> {
3814 let parsed = to_datetime_with_options(
3824 series,
3825 ToDatetimeOptions {
3826 infer_mixed_timezone: false,
3827 ..ToDatetimeOptions::default()
3828 },
3829 )?;
3830 let parse_failed = series
3831 .values()
3832 .iter()
3833 .zip(parsed.values())
3834 .any(|(original, parsed)| !original.is_missing() && parsed.is_missing());
3835
3836 if parse_failed {
3837 Ok(None)
3838 } else {
3839 Ok(Some(parsed))
3840 }
3841}
3842
3843fn pandas_csv_numeric_column_requires_float(values: &[Scalar]) -> bool {
3844 let mut saw_int = false;
3849 let mut saw_float = false;
3850
3851 for value in values {
3852 match value {
3853 Scalar::Int64(_) => saw_int = true,
3854 Scalar::Float64(_) => saw_float = true,
3855 Scalar::Null(_) => {}
3856 Scalar::Bool(_)
3857 | Scalar::Utf8(_)
3858 | Scalar::Timedelta64(_)
3859 | Scalar::Datetime64(_)
3860 | Scalar::Period(_)
3861 | Scalar::Interval(_) => {
3862 return false;
3863 }
3864 }
3865 }
3866
3867 saw_int && saw_float
3868}
3869
3870fn apply_pandas_csv_numeric_promotions(columns: &mut [Vec<Scalar>]) {
3871 for column in columns {
3872 if !pandas_csv_numeric_column_requires_float(column) {
3873 continue;
3874 }
3875
3876 for value in column {
3877 if let Scalar::Int64(v) = value {
3878 *value = Scalar::Float64(*v as f64);
3879 }
3880 }
3881 }
3882}
3883
3884fn apply_parse_date_combinations(
3885 headers: &mut Vec<String>,
3886 columns: &mut Vec<Vec<Scalar>>,
3887 parse_date_combinations: &[Vec<String>],
3888) -> Result<(), IoError> {
3889 if parse_date_combinations.is_empty() {
3890 return Ok(());
3891 }
3892
3893 validate_parse_date_combinations(headers, parse_date_combinations)?;
3894
3895 for combination in parse_date_combinations {
3896 if combination.is_empty() {
3897 continue;
3898 }
3899 let combined_name = combination.join("_");
3900 apply_one_parse_date_combination(headers, columns, combined_name, combination)?;
3901 }
3902
3903 Ok(())
3904}
3905
3906fn apply_parse_date_combinations_named(
3907 headers: &mut Vec<String>,
3908 columns: &mut Vec<Vec<Scalar>>,
3909 parse_date_combinations_named: &[(String, Vec<String>)],
3910) -> Result<(), IoError> {
3911 if parse_date_combinations_named.is_empty() {
3912 return Ok(());
3913 }
3914
3915 let mut assigned_names: std::collections::HashSet<String> = std::collections::HashSet::new();
3916 for (new_name, _) in parse_date_combinations_named {
3917 if !assigned_names.insert(new_name.clone()) {
3918 return Err(IoError::DuplicateColumnName(new_name.clone()));
3919 }
3920 }
3921
3922 let combos_only: Vec<Vec<String>> = parse_date_combinations_named
3923 .iter()
3924 .map(|(_, sources)| sources.clone())
3925 .collect();
3926 validate_parse_date_combinations(headers, &combos_only)?;
3927
3928 for (new_name, sources) in parse_date_combinations_named {
3929 if sources.is_empty() {
3930 continue;
3931 }
3932 apply_one_parse_date_combination(headers, columns, new_name.clone(), sources)?;
3933 }
3934
3935 Ok(())
3936}
3937
3938fn append_csv_record(
3939 columns: &mut [Vec<Scalar>],
3940 raw_columns: &mut [Vec<String>],
3941 record: &StringRecord,
3942 options: &CsvReadOptions,
3943 na_set: &HashSet<&str>,
3944 true_set: &HashSet<&str>,
3945 false_set: &HashSet<&str>,
3946) {
3947 for (idx, col) in columns.iter_mut().enumerate() {
3948 let field = record.get(idx).unwrap_or_default();
3949 col.push(parse_scalar_with_options(
3950 field,
3951 options.na_filter,
3952 options.keep_default_na,
3953 na_set,
3954 true_set,
3955 false_set,
3956 options.decimal,
3957 options.thousands,
3958 ));
3959 raw_columns[idx].push(field.to_owned());
3962 }
3963}
3964
3965fn should_skip_bad_csv_record(
3966 record: &StringRecord,
3967 expected_fields: usize,
3968 on_bad_lines: CsvOnBadLines,
3969) -> bool {
3970 if record.len() <= expected_fields {
3971 return false;
3972 }
3973
3974 match on_bad_lines {
3975 CsvOnBadLines::Error => false,
3976 CsvOnBadLines::Warn => {
3977 eprintln!(
3978 "Skipping bad CSV line: expected {expected_fields} fields, found {}",
3979 record.len()
3980 );
3981 true
3982 }
3983 CsvOnBadLines::Skip => true,
3984 }
3985}
3986
3987pub fn read_csv_with_options(input: &str, options: &CsvReadOptions) -> Result<DataFrame, IoError> {
3990 if csv_read_options_match_default_fast_path(options) {
3991 return read_csv_str(input);
3992 }
3993
3994 if csv_read_options_match_no_na_numeric_fast_path(options) {
3995 if let Some(frame) = csv_parse_cache_lookup(CsvParseCacheMode::NoNaNumeric, input) {
3996 return Ok(frame);
3997 }
3998
3999 if let Some(frame) = try_read_csv_with_options_no_na_numeric_fast_path(input)? {
4000 csv_parse_cache_store(CsvParseCacheMode::NoNaNumeric, input, &frame);
4001 return Ok(frame);
4002 }
4003 }
4004
4005 let mut builder = ReaderBuilder::new();
4006 builder
4007 .has_headers(false)
4008 .delimiter(options.delimiter)
4009 .quote(options.quotechar)
4010 .double_quote(options.doublequote)
4011 .escape(options.escapechar);
4012 if options.on_bad_lines != CsvOnBadLines::Error {
4013 builder.flexible(true);
4014 }
4015 if let Some(c) = options.comment {
4016 builder.comment(Some(c));
4017 }
4018 if let Some(term) = options.lineterminator {
4019 builder.terminator(csv::Terminator::Any(term));
4020 }
4021 let mut reader = builder.from_reader(input.as_bytes());
4022
4023 let max_rows = options.nrows.unwrap_or(usize::MAX);
4024 let skip = options.skiprows;
4025
4026 let mut records = reader.records();
4027 for _ in 0..skip {
4028 if records.next().transpose()?.is_none() {
4029 return Err(IoError::MissingHeaders);
4030 }
4031 }
4032
4033 let na_set: HashSet<&str> = options.na_values.iter().map(String::as_str).collect();
4037 let true_set: HashSet<&str> = options.true_values.iter().map(String::as_str).collect();
4038 let false_set: HashSet<&str> = options.false_values.iter().map(String::as_str).collect();
4039
4040 let mut row_count: i64 = 0;
4041 let (headers, mut columns, mut raw_columns) = if options.has_headers {
4045 let headers_record = records.next().transpose()?.ok_or(IoError::MissingHeaders)?;
4046 if headers_record.is_empty() {
4047 return Err(IoError::MissingHeaders);
4048 }
4049
4050 let header_count = headers_record.len();
4051 let row_hint = input.len() / (header_count * 8).max(1);
4052 let columns: Vec<Vec<Scalar>> = (0..header_count)
4053 .map(|_| Vec::with_capacity(row_hint))
4054 .collect();
4055 let raw_columns: Vec<Vec<String>> = (0..header_count)
4056 .map(|_| Vec::with_capacity(row_hint))
4057 .collect();
4058
4059 (
4060 headers_record
4061 .iter()
4062 .map(ToOwned::to_owned)
4063 .collect::<Vec<_>>(),
4064 columns,
4065 raw_columns,
4066 )
4067 } else {
4068 let first_record = records.next().transpose()?.ok_or(IoError::MissingHeaders)?;
4069 if first_record.is_empty() {
4070 return Err(IoError::MissingHeaders);
4071 }
4072
4073 let header_count = first_record.len();
4074 let row_hint = input.len() / (header_count * 8).max(1);
4075 let mut columns: Vec<Vec<Scalar>> = (0..header_count)
4076 .map(|_| Vec::with_capacity(row_hint))
4077 .collect();
4078 let mut raw_columns: Vec<Vec<String>> = (0..header_count)
4079 .map(|_| Vec::with_capacity(row_hint))
4080 .collect();
4081
4082 if (row_count as usize) < max_rows {
4083 append_csv_record(
4084 &mut columns,
4085 &mut raw_columns,
4086 &first_record,
4087 options,
4088 &na_set,
4089 &true_set,
4090 &false_set,
4091 );
4092 row_count += 1;
4093 }
4094
4095 (
4096 (0..header_count)
4097 .map(|idx| format!("column_{idx}"))
4098 .collect(),
4099 columns,
4100 raw_columns,
4101 )
4102 };
4103
4104 for row in records {
4105 if (row_count as usize) >= max_rows {
4106 break;
4107 }
4108 let record = row?;
4109 if should_skip_bad_csv_record(&record, columns.len(), options.on_bad_lines) {
4110 continue;
4111 }
4112 append_csv_record(
4113 &mut columns,
4114 &mut raw_columns,
4115 &record,
4116 options,
4117 &na_set,
4118 &true_set,
4119 &false_set,
4120 );
4121 row_count += 1;
4122 }
4123
4124 if options.skipfooter > 0 && (row_count as usize) > 0 {
4127 let drop = options.skipfooter.min(row_count as usize);
4128 for col in columns.iter_mut() {
4129 let new_len = col.len().saturating_sub(drop);
4130 col.truncate(new_len);
4131 }
4132 for col in raw_columns.iter_mut() {
4133 let new_len = col.len().saturating_sub(drop);
4134 col.truncate(new_len);
4135 }
4136 row_count -= drop as i64;
4137 }
4138 reject_duplicate_headers(&headers)?;
4139 if let Some(ref usecols) = options.usecols {
4140 validate_usecols(&headers, usecols)?;
4141 }
4142
4143 let (mut headers, mut columns, raw_columns) = if let Some(ref usecols) = options.usecols {
4145 let mut fh = Vec::new();
4146 let mut fc = Vec::new();
4147 let mut fr = Vec::new();
4148 for ((h, c), r) in headers.into_iter().zip(columns).zip(raw_columns) {
4149 if usecols.contains(&h) {
4150 fh.push(h);
4151 fc.push(c);
4152 fr.push(r);
4153 }
4154 }
4155 (fh, fc, fr)
4156 } else {
4157 (headers, columns, raw_columns)
4158 };
4159
4160 if let Some(ref parse_date_combinations) = options.parse_date_combinations {
4161 apply_parse_date_combinations(&mut headers, &mut columns, parse_date_combinations)?;
4162 }
4163
4164 if let Some(ref named) = options.parse_date_combinations_named {
4165 apply_parse_date_combinations_named(&mut headers, &mut columns, named)?;
4166 }
4167
4168 if let Some(ref parse_dates) = options.parse_dates {
4169 apply_parse_dates(&headers, &mut columns, parse_dates)?;
4170 }
4171
4172 apply_pandas_csv_numeric_promotions(&mut columns);
4173
4174 if let Some(ref dtype_map) = options.dtype {
4176 for (i, name) in headers.iter().enumerate() {
4177 if let Some(&target_dt) = dtype_map.get(name) {
4178 let coerced = columns[i]
4179 .iter()
4180 .map(|v| fp_types::cast_scalar(v, target_dt))
4181 .collect::<Result<Vec<_>, _>>()
4182 .map_err(|err| IoError::Column(ColumnError::from(err)))?;
4183 columns[i] = coerced;
4184 }
4185 }
4186 }
4187
4188 let header_count = headers.len();
4189
4190 let preserve_object_text = options.parse_dates.is_none()
4195 && options.parse_date_combinations.is_none()
4196 && options.parse_date_combinations_named.is_none();
4197 let dtype_forced = |name: &str| -> bool {
4198 options
4199 .dtype
4200 .as_ref()
4201 .is_some_and(|map| map.contains_key(name))
4202 };
4203
4204 if let Some(ref idx_col_name) = options.index_col {
4206 let idx_pos = headers
4207 .iter()
4208 .position(|h| h == idx_col_name)
4209 .ok_or_else(|| IoError::MissingIndexColumn(idx_col_name.clone()))?;
4210
4211 let index_values = columns.remove(idx_pos);
4212 let index_labels: Vec<fp_index::IndexLabel> = index_values
4213 .into_iter()
4214 .map(|s| match s {
4215 Scalar::Int64(v) => fp_index::IndexLabel::Int64(v),
4216 Scalar::Utf8(v) => fp_index::IndexLabel::Utf8(v),
4217 Scalar::Float64(v) => fp_index::IndexLabel::Utf8(v.to_string()),
4218 Scalar::Bool(v) => {
4219 fp_index::IndexLabel::Utf8(if v { "True" } else { "False" }.to_string())
4220 }
4221 Scalar::Null(kind) => fp_index::IndexLabel::Null(kind),
4225 Scalar::Timedelta64(v) => {
4226 if v == Timedelta::NAT {
4227 fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4228 } else {
4229 fp_index::IndexLabel::Utf8(Timedelta::format(v))
4230 }
4231 }
4232 Scalar::Datetime64(v) => {
4233 if v == Timestamp::NAT {
4234 fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4235 } else {
4236 fp_index::IndexLabel::Utf8(format_datetime_ns(v))
4237 }
4238 }
4239 Scalar::Period(v) => {
4240 if v == i64::MIN {
4241 fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4242 } else {
4243 fp_index::IndexLabel::Utf8(format!("Period[{v}]"))
4244 }
4245 }
4246 Scalar::Interval(iv) => fp_index::IndexLabel::Utf8(format!("{iv}")),
4247 })
4248 .collect();
4249 let index = Index::new(index_labels).set_name(idx_col_name);
4252
4253 let mut out_columns = BTreeMap::new();
4254 let mut column_order = Vec::with_capacity(headers.len() - 1);
4255 let mut col_idx = 0;
4256 for (orig_idx, _) in headers.iter().enumerate() {
4257 if orig_idx == idx_pos {
4258 continue;
4259 }
4260 let name = headers.get(orig_idx).cloned().unwrap_or_default();
4261 let column = if preserve_object_text && !dtype_forced(&name) {
4262 build_csv_object_aware_column(columns[col_idx].clone(), &raw_columns[orig_idx])?
4263 } else {
4264 Column::from_values(columns[col_idx].clone())?
4265 };
4266 out_columns.insert(name.clone(), column);
4267 column_order.push(name);
4268 col_idx += 1;
4269 }
4270 Ok(DataFrame::new_with_column_order(
4271 index,
4272 out_columns,
4273 column_order,
4274 )?)
4275 } else {
4276 let mut out_columns = BTreeMap::new();
4277 let mut column_order = Vec::with_capacity(header_count);
4278 for (idx, values) in columns.into_iter().enumerate() {
4279 let name = headers.get(idx).cloned().unwrap_or_default();
4280 let column = if preserve_object_text && !dtype_forced(&name) {
4281 build_csv_object_aware_column(values, &raw_columns[idx])?
4282 } else {
4283 Column::from_values(values)?
4284 };
4285 out_columns.insert(name.clone(), column);
4286 column_order.push(name);
4287 }
4288 let index = csv_default_unit_range_index(row_count);
4289 Ok(DataFrame::new_with_column_order(
4290 index,
4291 out_columns,
4292 column_order,
4293 )?)
4294 }
4295}
4296
4297pub fn read_csv_with_index_cols(
4301 input: &str,
4302 options: &CsvReadOptions,
4303 index_cols: &[&str],
4304) -> Result<DataFrame, IoError> {
4305 let frame = read_csv_with_options(input, options)?;
4306 promote_frame_index_columns(&frame, index_cols)
4307}
4308
4309pub fn read_csv(path: &Path) -> Result<DataFrame, IoError> {
4312 read_csv_with_options_path(path, &CsvReadOptions::default())
4313}
4314
4315pub fn read_csv_with_options_path(
4316 path: &Path,
4317 options: &CsvReadOptions,
4318) -> Result<DataFrame, IoError> {
4319 let content = std::fs::read_to_string(path)?;
4320 read_csv_with_options(&content, options)
4321}
4322
4323pub fn read_csv_with_index_cols_path(
4324 path: &Path,
4325 options: &CsvReadOptions,
4326 index_cols: &[&str],
4327) -> Result<DataFrame, IoError> {
4328 let content = std::fs::read_to_string(path)?;
4329 read_csv_with_index_cols(&content, options, index_cols)
4330}
4331
4332pub fn write_csv(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4333 let content = write_csv_string(frame)?;
4334 std::fs::write(path, content)?;
4335 Ok(())
4336}
4337
4338pub fn read_table_str(input: &str) -> Result<DataFrame, IoError> {
4347 let opts = CsvReadOptions {
4348 delimiter: b'\t',
4349 ..CsvReadOptions::default()
4350 };
4351 read_csv_with_options(input, &opts)
4352}
4353
4354pub fn read_table_with_options(
4359 input: &str,
4360 options: &CsvReadOptions,
4361) -> Result<DataFrame, IoError> {
4362 let mut effective = options.clone();
4363 if effective.delimiter == b',' {
4364 effective.delimiter = b'\t';
4365 }
4366 read_csv_with_options(input, &effective)
4367}
4368
4369pub fn read_table(path: &Path) -> Result<DataFrame, IoError> {
4371 let opts = CsvReadOptions {
4372 delimiter: b'\t',
4373 ..CsvReadOptions::default()
4374 };
4375 read_csv_with_options_path(path, &opts)
4376}
4377
4378pub fn read_table_with_options_path(
4382 path: &Path,
4383 options: &CsvReadOptions,
4384) -> Result<DataFrame, IoError> {
4385 let mut effective = options.clone();
4386 if effective.delimiter == b',' {
4387 effective.delimiter = b'\t';
4388 }
4389 read_csv_with_options_path(path, &effective)
4390}
4391
4392pub fn read_fwf(path: &Path, options: &FwfReadOptions) -> Result<DataFrame, IoError> {
4400 let content = std::fs::read_to_string(path)?;
4401 read_fwf_str(&content, options)
4402}
4403
4404fn deferred_reader_error(method: &str, reason: &str) -> IoError {
4421 IoError::Deferred(format!(
4422 "{method}: in scope but deferred; {reason}. Use the pandas surface in the meantime."
4423 ))
4424}
4425
4426fn deferred_writer_error(method: &str, reason: &str) -> IoError {
4427 IoError::Deferred(format!(
4428 "{method}: in scope but deferred; {reason}. Use the pandas surface in the meantime."
4429 ))
4430}
4431
4432pub fn read_clipboard() -> Result<DataFrame, IoError> {
4434 Err(deferred_reader_error(
4435 "read_clipboard",
4436 "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
4437 ))
4438}
4439
4440pub fn read_gbq(_query: &str, _project_id: Option<&str>) -> Result<DataFrame, IoError> {
4442 Err(deferred_reader_error(
4443 "read_gbq",
4444 "Google BigQuery integration is outside FrankenPandas's local file-format scope",
4445 ))
4446}
4447
4448pub fn read_sas(_path: &Path) -> Result<DataFrame, IoError> {
4450 Err(deferred_reader_error(
4451 "read_sas",
4452 "no first-party Rust SAS sas7bdat/xport reader exists at pandas-parity yet",
4453 ))
4454}
4455
4456pub fn read_spss(_path: &Path) -> Result<DataFrame, IoError> {
4458 Err(deferred_reader_error(
4459 "read_spss",
4460 "no first-party Rust SPSS .sav reader exists at pandas-parity yet",
4461 ))
4462}
4463
4464pub fn write_markdown(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4468 write_markdown_with_options(frame, path, &MarkdownWriteOptions::default())
4469}
4470
4471pub fn write_markdown_with_options(
4473 frame: &DataFrame,
4474 path: &Path,
4475 options: &MarkdownWriteOptions,
4476) -> Result<(), IoError> {
4477 let content = write_markdown_string_with_options(frame, options)?;
4478 std::fs::write(path, content)?;
4479 Ok(())
4480}
4481
4482pub fn write_latex(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4484 write_latex_with_options(frame, path, &LatexWriteOptions::default())
4485}
4486
4487pub fn write_latex_with_options(
4489 frame: &DataFrame,
4490 path: &Path,
4491 options: &LatexWriteOptions,
4492) -> Result<(), IoError> {
4493 let content = write_latex_string_with_options(frame, options)?;
4494 std::fs::write(path, content)?;
4495 Ok(())
4496}
4497
4498pub fn read_html(path: &Path) -> Result<DataFrame, IoError> {
4501 read_html_with_options(path, &HtmlReadOptions::default())
4502}
4503
4504pub fn read_html_with_options(
4505 path: &Path,
4506 options: &HtmlReadOptions,
4507) -> Result<DataFrame, IoError> {
4508 let content = std::fs::read_to_string(path)?;
4509 read_html_str_with_options(&content, options)
4510}
4511
4512pub fn write_html(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4513 write_html_with_options(frame, path, &HtmlWriteOptions::default())
4514}
4515
4516pub fn write_html_with_options(
4517 frame: &DataFrame,
4518 path: &Path,
4519 options: &HtmlWriteOptions,
4520) -> Result<(), IoError> {
4521 let content = write_html_string_with_options(frame, options)?;
4522 std::fs::write(path, content)?;
4523 Ok(())
4524}
4525
4526pub fn write_xml(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4529 write_xml_with_options(frame, path, &XmlWriteOptions::default())
4530}
4531
4532pub fn write_xml_with_options(
4533 frame: &DataFrame,
4534 path: &Path,
4535 options: &XmlWriteOptions,
4536) -> Result<(), IoError> {
4537 let content = write_xml_string_with_options(frame, options)?;
4538 std::fs::write(path, content)?;
4539 Ok(())
4540}
4541
4542pub fn read_xml(path: &Path) -> Result<DataFrame, IoError> {
4545 read_xml_with_options(path, &XmlReadOptions::default())
4546}
4547
4548pub fn read_xml_with_options(path: &Path, options: &XmlReadOptions) -> Result<DataFrame, IoError> {
4549 let content = std::fs::read_to_string(path)?;
4550 read_xml_str_with_options(&content, options)
4551}
4552
4553fn json_value_to_scalar(val: &serde_json::Value) -> Scalar {
4556 match val {
4557 serde_json::Value::Null => Scalar::Null(NullKind::Null),
4558 serde_json::Value::Bool(b) => Scalar::Bool(*b),
4559 serde_json::Value::Number(n) => {
4560 if let Some(i) = n.as_i64() {
4561 Scalar::Int64(i)
4562 } else if let Some(f) = n.as_f64() {
4563 Scalar::Float64(f)
4564 } else {
4565 Scalar::Utf8(n.to_string())
4566 }
4567 }
4568 serde_json::Value::String(s) => Scalar::Utf8(s.clone()),
4569 other => Scalar::Utf8(other.to_string()),
4570 }
4571}
4572
4573fn parse_json_value_allowing_pandas_nan(input: &str) -> Result<serde_json::Value, IoError> {
4574 match serde_json::from_str(input) {
4575 Ok(value) => Ok(value),
4576 Err(original) => {
4577 let normalized = normalize_bare_json_nan_tokens(input);
4578 if normalized == input {
4579 return Err(original.into());
4580 }
4581 serde_json::from_str(&normalized).map_err(IoError::from)
4582 }
4583 }
4584}
4585
4586fn normalize_bare_json_nan_tokens(input: &str) -> String {
4587 let mut output = String::with_capacity(input.len());
4588 let mut index = 0;
4589 let mut in_string = false;
4590 let mut escaped = false;
4591
4592 while index < input.len() {
4593 let rest = &input[index..];
4594 let Some(ch) = rest.chars().next() else {
4595 break;
4596 };
4597
4598 if in_string {
4599 output.push(ch);
4600 index += ch.len_utf8();
4601 if escaped {
4602 escaped = false;
4603 } else if ch == '\\' {
4604 escaped = true;
4605 } else if ch == '"' {
4606 in_string = false;
4607 }
4608 continue;
4609 }
4610
4611 if ch == '"' {
4612 in_string = true;
4613 output.push(ch);
4614 index += ch.len_utf8();
4615 continue;
4616 }
4617
4618 if rest.starts_with("NaN")
4619 && is_json_value_start_boundary(input, index)
4620 && is_json_value_end_boundary(input, index + 3)
4621 {
4622 output.push_str("null");
4623 index += 3;
4624 continue;
4625 }
4626
4627 output.push(ch);
4628 index += ch.len_utf8();
4629 }
4630
4631 output
4632}
4633
4634fn is_json_value_start_boundary(input: &str, index: usize) -> bool {
4635 input[..index]
4636 .chars()
4637 .rev()
4638 .find(|ch| !ch.is_whitespace())
4639 .is_none_or(|ch| matches!(ch, ':' | '[' | ','))
4640}
4641
4642fn is_json_value_end_boundary(input: &str, index: usize) -> bool {
4643 input[index..]
4644 .chars()
4645 .find(|ch| !ch.is_whitespace())
4646 .is_none_or(|ch| matches!(ch, ',' | ']' | '}'))
4647}
4648
4649fn column_from_json_values(values: Vec<Scalar>) -> Result<Column, IoError> {
4650 let saw_utf8 = values.iter().any(|value| matches!(value, Scalar::Utf8(_)));
4651 let saw_missing = values.iter().any(Scalar::is_missing);
4652 let saw_numeric_like = values.iter().any(|value| {
4653 matches!(
4654 value,
4655 Scalar::Int64(_) | Scalar::Float64(_) | Scalar::Bool(_)
4656 )
4657 });
4658
4659 if !saw_utf8 && saw_missing && (saw_numeric_like || values.iter().all(Scalar::is_missing)) {
4660 let promoted = values
4661 .into_iter()
4662 .map(|value| match value {
4663 Scalar::Int64(value) => Scalar::Float64(value as f64),
4664 Scalar::Bool(value) => Scalar::Float64(if value { 1.0 } else { 0.0 }),
4665 Scalar::Null(_) => Scalar::Null(NullKind::NaN),
4666 other => other,
4667 })
4668 .collect();
4669 return Column::new(DType::Float64, promoted).map_err(IoError::from);
4670 }
4671
4672 Column::from_values(values).map_err(IoError::from)
4673}
4674
4675fn scalar_to_json(scalar: &Scalar) -> serde_json::Value {
4676 match scalar {
4677 Scalar::Null(_) => serde_json::Value::Null,
4678 Scalar::Bool(b) => serde_json::Value::Bool(*b),
4679 Scalar::Int64(i) => serde_json::json!(*i),
4680 Scalar::Float64(f) => {
4681 if f.is_nan() || f.is_infinite() {
4682 serde_json::Value::Null
4683 } else {
4684 serde_json::json!(*f)
4685 }
4686 }
4687 Scalar::Utf8(s) => serde_json::Value::String(s.clone()),
4688 Scalar::Timedelta64(v) => {
4692 if *v == Timedelta::NAT {
4693 serde_json::Value::Null
4694 } else {
4695 serde_json::json!(*v / 1_000_000)
4696 }
4697 }
4698 Scalar::Datetime64(v) => {
4699 if *v == Timestamp::NAT {
4700 serde_json::Value::Null
4701 } else {
4702 serde_json::json!(*v / 1_000_000)
4703 }
4704 }
4705 Scalar::Period(v) => {
4706 if *v == i64::MIN {
4707 serde_json::Value::Null
4708 } else {
4709 serde_json::Value::String(format!("Period[{v}]"))
4710 }
4711 }
4712 Scalar::Interval(iv) => serde_json::Value::String(format!("{iv}")),
4713 }
4714}
4715
4716fn column_promotes_int_json_values_to_float(_values: &[Scalar]) -> bool {
4717 false
4722}
4723
4724fn scalar_to_json_with_column_promotion(
4725 scalar: &Scalar,
4726 promote_int_to_float: bool,
4727) -> serde_json::Value {
4728 if promote_int_to_float && let Scalar::Int64(v) = scalar {
4729 return serde_json::json!(*v as f64);
4730 }
4731 scalar_to_json(scalar)
4732}
4733
4734fn json_value_to_index_label(value: &serde_json::Value) -> IndexLabel {
4735 match value {
4736 serde_json::Value::Number(n) => n
4737 .as_i64()
4738 .map(IndexLabel::Int64)
4739 .unwrap_or_else(|| IndexLabel::Utf8(n.to_string())),
4740 serde_json::Value::String(s) => IndexLabel::Utf8(s.clone()),
4741 serde_json::Value::Bool(b) => IndexLabel::Utf8(b.to_string()),
4742 serde_json::Value::Null => IndexLabel::Utf8("null".to_owned()),
4743 other => IndexLabel::Utf8(other.to_string()),
4744 }
4745}
4746
4747fn json_value_to_column_name(value: &serde_json::Value) -> String {
4748 match value {
4749 serde_json::Value::String(s) => s.clone(),
4750 serde_json::Value::Number(n) => n.to_string(),
4751 serde_json::Value::Bool(b) => b.to_string(),
4752 serde_json::Value::Null => "null".to_owned(),
4753 other => other.to_string(),
4754 }
4755}
4756
4757fn json_key_to_index_label(value: &str) -> IndexLabel {
4758 value
4759 .parse::<i64>()
4760 .map(IndexLabel::Int64)
4761 .unwrap_or_else(|_| IndexLabel::Utf8(value.to_owned()))
4762}
4763
4764fn index_label_to_json(label: &IndexLabel) -> serde_json::Value {
4765 match label {
4766 IndexLabel::Int64(v) => serde_json::json!(*v),
4767 IndexLabel::Utf8(v) => serde_json::Value::String(v.clone()),
4768 IndexLabel::Timedelta64(ns) => serde_json::json!(*ns / 1_000_000),
4773 IndexLabel::Datetime64(ns) => serde_json::json!(*ns / 1_000_000),
4774 IndexLabel::Null(_) => serde_json::Value::Null,
4776 }
4777}
4778
4779fn index_label_json_key(label: &IndexLabel) -> String {
4784 match label {
4785 IndexLabel::Datetime64(ns) | IndexLabel::Timedelta64(ns) => (*ns / 1_000_000).to_string(),
4786 other => other.to_string(),
4787 }
4788}
4789
4790const SYNTHETIC_ROW_MULTIINDEX_PREFIX: &str = "__index_level_";
4791
4792fn index_label_to_scalar_value(label: &IndexLabel) -> Scalar {
4793 match label {
4794 IndexLabel::Int64(v) => Scalar::Int64(*v),
4795 IndexLabel::Utf8(v) => Scalar::Utf8(v.clone()),
4796 IndexLabel::Timedelta64(v) => Scalar::Timedelta64(*v),
4797 IndexLabel::Datetime64(v) => Scalar::Utf8(format_datetime_ns(*v)),
4798 IndexLabel::Null(kind) => Scalar::Null(*kind),
4800 }
4801}
4802
4803fn synthetic_row_multiindex_names(nlevels: usize) -> Vec<String> {
4804 (0..nlevels)
4805 .map(|level| format!("{SYNTHETIC_ROW_MULTIINDEX_PREFIX}{level}__"))
4806 .collect()
4807}
4808
4809fn materialize_row_multiindex_columns(
4810 frame: &DataFrame,
4811 names: &[String],
4812) -> Result<DataFrame, IoError> {
4813 let Some(row_multiindex) = frame.row_multiindex() else {
4814 return Ok(frame.clone());
4815 };
4816
4817 let mut columns = BTreeMap::new();
4818 let mut column_order = Vec::with_capacity(names.len() + frame.column_names().len());
4819 for (level, name) in names.iter().enumerate() {
4820 let level_index = row_multiindex.get_level_values(level)?;
4821 let values = level_index
4822 .labels()
4823 .iter()
4824 .map(index_label_to_scalar_value)
4825 .collect::<Vec<_>>();
4826 columns.insert(name.clone(), Column::from_values(values)?);
4827 column_order.push(name.clone());
4828 }
4829
4830 for name in frame.column_names() {
4831 let column = frame
4832 .column(name)
4833 .ok_or_else(|| {
4834 IoError::Frame(FrameError::CompatibilityRejected(format!(
4835 "column not found: '{name}'"
4836 )))
4837 })?
4838 .clone();
4839 columns.insert(name.clone(), column);
4840 column_order.push(name.clone());
4841 }
4842
4843 let index = Index::from_i64((0..frame.len() as i64).collect());
4844 DataFrame::new_with_column_order(index, columns, column_order).map_err(IoError::from)
4845}
4846
4847fn materialize_named_row_multiindex_columns(frame: &DataFrame) -> Result<DataFrame, IoError> {
4848 if frame.row_multiindex().is_some() {
4849 frame.reset_index(false).map_err(IoError::from)
4850 } else {
4851 Ok(frame.clone())
4852 }
4853}
4854
4855fn materialize_synthetic_row_multiindex_columns(frame: &DataFrame) -> Result<DataFrame, IoError> {
4856 let Some(row_multiindex) = frame.row_multiindex() else {
4857 return Ok(frame.clone());
4858 };
4859 let names = synthetic_row_multiindex_names(row_multiindex.nlevels());
4860 materialize_row_multiindex_columns(frame, &names)
4861}
4862
4863fn promote_frame_index_columns(
4864 frame: &DataFrame,
4865 index_cols: &[&str],
4866) -> Result<DataFrame, IoError> {
4867 if index_cols.is_empty() {
4868 return Ok(frame.clone());
4869 }
4870 if index_cols.len() == 1 {
4871 frame.set_index(index_cols[0], true).map_err(IoError::from)
4872 } else {
4873 frame
4874 .set_index_multi(index_cols, true, "|")
4875 .map_err(IoError::from)
4876 }
4877}
4878
4879fn detect_synthetic_row_multiindex_columns(frame: &DataFrame) -> Vec<String> {
4880 let mut out = Vec::new();
4881 for (level, name) in frame.column_names().iter().enumerate() {
4882 let expected = format!("{SYNTHETIC_ROW_MULTIINDEX_PREFIX}{level}__");
4883 if **name == expected {
4884 out.push(expected);
4885 } else {
4886 break;
4887 }
4888 }
4889 out
4890}
4891
4892fn promote_synthetic_row_multiindex_if_present(frame: &DataFrame) -> Result<DataFrame, IoError> {
4893 let synthetic_cols = detect_synthetic_row_multiindex_columns(frame);
4894 if synthetic_cols.len() < 2 {
4895 return Ok(frame.clone());
4896 }
4897 let refs = synthetic_cols
4898 .iter()
4899 .map(String::as_str)
4900 .collect::<Vec<_>>();
4901 promote_frame_index_columns(frame, &refs)
4902}
4903
4904pub fn read_json_str(input: &str, orient: JsonOrient) -> Result<DataFrame, IoError> {
4905 let parsed = parse_json_value_allowing_pandas_nan(input)?;
4906
4907 match orient {
4908 JsonOrient::Records => {
4909 let arr = parsed
4910 .as_array()
4911 .ok_or_else(|| IoError::JsonFormat("expected array for records orient".into()))?;
4912 if arr.is_empty() {
4913 return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
4914 }
4915
4916 let mut col_names_set = std::collections::BTreeSet::new();
4918 let mut col_names = Vec::new();
4919 for record in arr {
4920 let obj = record
4921 .as_object()
4922 .ok_or_else(|| IoError::JsonFormat("each record must be an object".into()))?;
4923 for key in obj.keys() {
4924 if col_names_set.insert(key.clone()) {
4925 col_names.push(key.clone());
4926 }
4927 }
4928 }
4929
4930 let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
4931 for name in &col_names {
4932 columns.insert(name.clone(), Vec::with_capacity(arr.len()));
4933 }
4934
4935 for record in arr {
4936 let obj = record
4937 .as_object()
4938 .ok_or_else(|| IoError::JsonFormat("each record must be an object".into()))?;
4939 for name in &col_names {
4940 let val = obj.get(name).unwrap_or(&serde_json::Value::Null);
4941 columns
4942 .get_mut(name)
4943 .ok_or_else(|| {
4944 IoError::JsonFormat(format!(
4945 "records orient missing column accumulator for '{name}'"
4946 ))
4947 })?
4948 .push(json_value_to_scalar(val));
4949 }
4950 }
4951
4952 let row_count = arr.len() as i64;
4953 let mut out = BTreeMap::new();
4954 for (name, vals) in columns {
4955 out.insert(name, column_from_json_values(vals)?);
4956 }
4957 let index = Index::from_i64((0..row_count).collect());
4958 let frame = DataFrame::new_with_column_order(index, out, col_names)?;
4959 promote_synthetic_row_multiindex_if_present(&frame)
4960 }
4961 JsonOrient::Columns => {
4962 let obj = parsed
4963 .as_object()
4964 .ok_or_else(|| IoError::JsonFormat("expected object for columns orient".into()))?;
4965
4966 if obj.is_empty() {
4967 return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
4968 }
4969
4970 let mut raw_columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
4971 let mut column_order = Vec::with_capacity(obj.len());
4972 let mut index_labels = Vec::new();
4973 let mut index_lookup = BTreeMap::new();
4974 for (col_name, col_data) in obj {
4975 let col_obj = col_data.as_object().ok_or_else(|| {
4976 IoError::JsonFormat("column data must be {index: val}".into())
4977 })?;
4978 let mut values = vec![Scalar::Null(NullKind::Null); index_labels.len()];
4979 for (label_key, val) in col_obj {
4980 let label = json_key_to_index_label(label_key);
4981 let row_idx = if let Some(&existing_idx) = index_lookup.get(&label) {
4982 existing_idx
4983 } else {
4984 let next_idx = index_labels.len();
4985 index_labels.push(label.clone());
4986 index_lookup.insert(label, next_idx);
4987 for existing_values in raw_columns.values_mut() {
4988 existing_values.push(Scalar::Null(NullKind::Null));
4989 }
4990 values.push(Scalar::Null(NullKind::Null));
4991 next_idx
4992 };
4993 if row_idx >= values.len() {
4994 values.resize(index_labels.len(), Scalar::Null(NullKind::Null));
4995 }
4996 values[row_idx] = json_value_to_scalar(val);
4997 }
4998 if values.len() < index_labels.len() {
4999 values.resize(index_labels.len(), Scalar::Null(NullKind::Null));
5000 }
5001 raw_columns.insert(col_name.clone(), values);
5002 column_order.push(col_name.clone());
5003 }
5004
5005 let mut out = BTreeMap::new();
5006 for (name, vals) in raw_columns {
5007 out.insert(name, column_from_json_values(vals)?);
5008 }
5009
5010 let frame =
5011 DataFrame::new_with_column_order(Index::new(index_labels), out, column_order)?;
5012 promote_synthetic_row_multiindex_if_present(&frame)
5013 }
5014 JsonOrient::Index => {
5015 let obj = parsed
5016 .as_object()
5017 .ok_or_else(|| IoError::JsonFormat("expected object for index orient".into()))?;
5018
5019 if obj.is_empty() {
5020 return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
5021 }
5022
5023 let mut index_labels = Vec::with_capacity(obj.len());
5024 let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
5025 let mut column_order = Vec::new();
5026 let mut seen_columns = std::collections::HashSet::new();
5027
5028 for (row_label, row_data) in obj {
5029 let row_obj = row_data.as_object().ok_or_else(|| {
5030 IoError::JsonFormat("index orient rows must be objects".into())
5031 })?;
5032
5033 let row_idx = index_labels.len();
5034
5035 for values in columns.values_mut() {
5037 values.push(Scalar::Null(NullKind::Null));
5038 }
5039
5040 let parsed_label = row_label
5041 .parse::<i64>()
5042 .map(IndexLabel::Int64)
5043 .unwrap_or_else(|_| IndexLabel::Utf8(row_label.clone()));
5044 index_labels.push(parsed_label);
5045
5046 for (col_name, value) in row_obj {
5047 if seen_columns.insert(col_name.clone()) {
5048 column_order.push(col_name.clone());
5049 }
5050 let scalar = json_value_to_scalar(value);
5051 if let Some(values) = columns.get_mut(col_name) {
5052 values[row_idx] = scalar;
5053 } else {
5054 let mut values = vec![Scalar::Null(NullKind::Null); row_idx + 1];
5055 values[row_idx] = scalar;
5056 columns.insert(col_name.clone(), values);
5057 }
5058 }
5059 }
5060
5061 let mut out = BTreeMap::new();
5062 for (name, vals) in columns {
5063 out.insert(name, column_from_json_values(vals)?);
5064 }
5065 let frame =
5066 DataFrame::new_with_column_order(Index::new(index_labels), out, column_order)?;
5067 promote_synthetic_row_multiindex_if_present(&frame)
5068 }
5069 JsonOrient::Split => {
5070 let obj = parsed
5071 .as_object()
5072 .ok_or_else(|| IoError::JsonFormat("expected object for split orient".into()))?;
5073
5074 let col_names: Vec<String> = obj
5075 .get("columns")
5076 .and_then(|v| v.as_array())
5077 .ok_or_else(|| IoError::JsonFormat("split orient needs 'columns' array".into()))?
5078 .iter()
5079 .map(json_value_to_column_name)
5080 .collect();
5081 reject_duplicate_headers(&col_names)?;
5082
5083 let data = obj
5084 .get("data")
5085 .and_then(|v| v.as_array())
5086 .ok_or_else(|| IoError::JsonFormat("split orient needs 'data' array".into()))?;
5087
5088 let explicit_index = obj
5089 .get("index")
5090 .map(|v| {
5091 v.as_array()
5092 .ok_or_else(|| {
5093 IoError::JsonFormat("split orient 'index' must be an array".into())
5094 })
5095 .map(|arr| {
5096 arr.iter()
5097 .map(json_value_to_index_label)
5098 .collect::<Vec<_>>()
5099 })
5100 })
5101 .transpose()?;
5102
5103 let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
5104 for name in &col_names {
5105 columns.insert(name.clone(), Vec::with_capacity(data.len()));
5106 }
5107
5108 for (row_idx, row) in data.iter().enumerate() {
5109 let arr = row
5110 .as_array()
5111 .ok_or_else(|| IoError::JsonFormat("each data row must be an array".into()))?;
5112 if arr.len() != col_names.len() {
5113 return Err(IoError::JsonFormat(format!(
5114 "split orient row {row_idx} length ({}) does not match columns length ({})",
5115 arr.len(),
5116 col_names.len()
5117 )));
5118 }
5119 for (i, name) in col_names.iter().enumerate() {
5120 let val = arr.get(i).unwrap_or(&serde_json::Value::Null);
5121 columns
5122 .get_mut(name)
5123 .ok_or_else(|| {
5124 IoError::JsonFormat(format!(
5125 "split orient missing column accumulator for '{name}'"
5126 ))
5127 })?
5128 .push(json_value_to_scalar(val));
5129 }
5130 }
5131
5132 let row_count = data.len() as i64;
5133 let mut out = BTreeMap::new();
5134 for (name, vals) in columns {
5135 out.insert(name, column_from_json_values(vals)?);
5136 }
5137 let index = match explicit_index {
5138 Some(labels) => {
5139 if labels.len() != row_count as usize {
5140 return Err(IoError::JsonFormat(format!(
5141 "split orient index length ({}) must match data row count ({row_count})",
5142 labels.len()
5143 )));
5144 }
5145 Index::new(labels)
5146 }
5147 None => Index::from_i64((0..row_count).collect()),
5148 };
5149 let frame = DataFrame::new_with_column_order(index, out, col_names)?;
5150 promote_synthetic_row_multiindex_if_present(&frame)
5151 }
5152 JsonOrient::Values => {
5153 let rows = parsed
5154 .as_array()
5155 .ok_or_else(|| IoError::JsonFormat("expected array for values orient".into()))?;
5156
5157 if rows.is_empty() {
5158 return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
5159 }
5160
5161 let mut width = 0usize;
5162 for row in rows {
5163 let arr = row.as_array().ok_or_else(|| {
5164 IoError::JsonFormat("each values row must be an array".into())
5165 })?;
5166 width = width.max(arr.len());
5167 }
5168
5169 let column_order: Vec<String> = (0..width).map(|idx| idx.to_string()).collect();
5170 let mut columns: BTreeMap<String, Vec<Scalar>> = column_order
5171 .iter()
5172 .cloned()
5173 .map(|name| (name, Vec::with_capacity(rows.len())))
5174 .collect();
5175
5176 for row in rows {
5177 let arr = row.as_array().ok_or_else(|| {
5178 IoError::JsonFormat("each values row must be an array".into())
5179 })?;
5180 for (col_idx, name) in column_order.iter().enumerate() {
5181 let val = arr.get(col_idx).unwrap_or(&serde_json::Value::Null);
5182 columns
5183 .get_mut(name)
5184 .ok_or_else(|| {
5185 IoError::JsonFormat(format!(
5186 "values orient missing column accumulator for '{name}'"
5187 ))
5188 })?
5189 .push(json_value_to_scalar(val));
5190 }
5191 }
5192
5193 let mut out = BTreeMap::new();
5194 for (name, vals) in columns {
5195 out.insert(name, column_from_json_values(vals)?);
5196 }
5197 let index = Index::from_i64((0..rows.len() as i64).collect());
5198 let frame = DataFrame::new_with_column_order(index, out, column_order)?;
5199 promote_synthetic_row_multiindex_if_present(&frame)
5200 }
5201 }
5202}
5203
5204pub fn write_json_string(frame: &DataFrame, orient: JsonOrient) -> Result<String, IoError> {
5205 if frame.row_multiindex().is_some() && orient != JsonOrient::Values {
5206 let materialized = materialize_synthetic_row_multiindex_columns(frame)?;
5207 return write_json_string(&materialized, orient);
5208 }
5209
5210 let headers: Vec<String> = frame.column_names().into_iter().cloned().collect();
5211 let row_count = frame.index().len();
5212 let column_float_promotions = headers
5213 .iter()
5214 .map(|name| {
5215 frame
5216 .column(name)
5217 .is_some_and(|column| column_promotes_int_json_values_to_float(column.values()))
5218 })
5219 .collect::<Vec<_>>();
5220
5221 match orient {
5222 JsonOrient::Records => {
5223 let mut records = Vec::with_capacity(row_count);
5224 for row_idx in 0..row_count {
5225 let mut obj = serde_json::Map::new();
5226 for (name, promote_int_to_float) in
5227 headers.iter().zip(column_float_promotions.iter())
5228 {
5229 let val = frame
5230 .column(name)
5231 .and_then(|c| c.value(row_idx))
5232 .map(|value| {
5233 scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5234 })
5235 .unwrap_or(serde_json::Value::Null);
5236 obj.insert(name.clone(), val);
5237 }
5238 records.push(serde_json::Value::Object(obj));
5239 }
5240 Ok(serde_json::to_string(&records)?)
5241 }
5242 JsonOrient::Columns => {
5243 let mut outer = serde_json::Map::new();
5244 for (name, promote_int_to_float) in headers.iter().zip(column_float_promotions.iter()) {
5245 let mut col_obj = serde_json::Map::new();
5246 if let Some(col) = frame.column(name) {
5247 for (label, val) in frame.index().labels().iter().zip(col.values()) {
5248 let key = index_label_json_key(label);
5249 if col_obj
5250 .insert(
5251 key.clone(),
5252 scalar_to_json_with_column_promotion(val, *promote_int_to_float),
5253 )
5254 .is_some()
5255 {
5256 return Err(IoError::JsonFormat(format!(
5257 "columns orient cannot encode duplicate index label key: {key}"
5258 )));
5259 }
5260 }
5261 }
5262 outer.insert(name.clone(), serde_json::Value::Object(col_obj));
5263 }
5264 Ok(serde_json::to_string(&serde_json::Value::Object(outer))?)
5265 }
5266 JsonOrient::Index => {
5267 let mut outer = serde_json::Map::new();
5268 for row_idx in 0..row_count {
5269 let mut row_obj = serde_json::Map::new();
5270 for (name, promote_int_to_float) in
5271 headers.iter().zip(column_float_promotions.iter())
5272 {
5273 let val = frame
5274 .column(name)
5275 .and_then(|c| c.value(row_idx))
5276 .map(|value| {
5277 scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5278 })
5279 .unwrap_or(serde_json::Value::Null);
5280 row_obj.insert(name.clone(), val);
5281 }
5282
5283 let row_label = index_label_json_key(&frame.index().labels()[row_idx]);
5284 if outer
5285 .insert(row_label.clone(), serde_json::Value::Object(row_obj))
5286 .is_some()
5287 {
5288 return Err(IoError::JsonFormat(format!(
5289 "index orient cannot encode duplicate index label key: {row_label}"
5290 )));
5291 }
5292 }
5293 Ok(serde_json::to_string(&serde_json::Value::Object(outer))?)
5294 }
5295 JsonOrient::Split => {
5296 let col_array: Vec<serde_json::Value> = headers
5297 .iter()
5298 .map(|h| serde_json::Value::String(h.clone()))
5299 .collect();
5300 let index_array: Vec<serde_json::Value> = frame
5301 .index()
5302 .labels()
5303 .iter()
5304 .map(index_label_to_json)
5305 .collect();
5306
5307 let mut data = Vec::with_capacity(row_count);
5308 for row_idx in 0..row_count {
5309 let row: Vec<serde_json::Value> = headers
5310 .iter()
5311 .zip(column_float_promotions.iter())
5312 .map(|(name, promote_int_to_float)| {
5313 frame
5314 .column(name)
5315 .and_then(|c| c.value(row_idx))
5316 .map(|value| {
5317 scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5318 })
5319 .unwrap_or(serde_json::Value::Null)
5320 })
5321 .collect();
5322 data.push(serde_json::Value::Array(row));
5323 }
5324
5325 let mut obj = serde_json::Map::new();
5326 obj.insert("columns".into(), serde_json::Value::Array(col_array));
5327 obj.insert("index".into(), serde_json::Value::Array(index_array));
5328 obj.insert("data".into(), serde_json::Value::Array(data));
5329 Ok(serde_json::to_string(&serde_json::Value::Object(obj))?)
5330 }
5331 JsonOrient::Values => {
5332 let mut data = Vec::with_capacity(row_count);
5333 for row_idx in 0..row_count {
5334 let row: Vec<serde_json::Value> = headers
5335 .iter()
5336 .zip(column_float_promotions.iter())
5337 .map(|(name, promote_int_to_float)| {
5338 frame
5339 .column(name)
5340 .and_then(|c| c.value(row_idx))
5341 .map(|value| {
5342 scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5343 })
5344 .unwrap_or(serde_json::Value::Null)
5345 })
5346 .collect();
5347 data.push(serde_json::Value::Array(row));
5348 }
5349 Ok(serde_json::to_string(&serde_json::Value::Array(data))?)
5350 }
5351 }
5352}
5353
5354pub fn read_json(path: &Path, orient: JsonOrient) -> Result<DataFrame, IoError> {
5357 let content = std::fs::read_to_string(path)?;
5358 read_json_str(&content, orient)
5359}
5360
5361pub fn write_json(frame: &DataFrame, path: &Path, orient: JsonOrient) -> Result<(), IoError> {
5362 let content = write_json_string(frame, orient)?;
5363 std::fs::write(path, content)?;
5364 Ok(())
5365}
5366
5367pub fn read_pickle(path: &Path) -> Result<DataFrame, IoError> {
5371 read_pickle_with_options(path, &PickleReadOptions::default())
5372}
5373
5374pub fn read_pickle_with_options(
5376 path: &Path,
5377 options: &PickleReadOptions,
5378) -> Result<DataFrame, IoError> {
5379 let content = std::fs::read(path)?;
5380 read_pickle_bytes_with_options(&content, options)
5381}
5382
5383pub fn write_pickle(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5385 write_pickle_with_options(frame, path, &PickleWriteOptions::default())
5386}
5387
5388pub fn write_pickle_with_options(
5390 frame: &DataFrame,
5391 path: &Path,
5392 options: &PickleWriteOptions,
5393) -> Result<(), IoError> {
5394 let content = write_pickle_bytes_with_options(frame, options)?;
5395 std::fs::write(path, content)?;
5396 Ok(())
5397}
5398
5399pub fn read_hdf(path: &Path) -> Result<DataFrame, IoError> {
5403 read_hdf_with_options(path, &HdfReadOptions::default())
5404}
5405
5406pub fn read_hdf_key(path: &Path, key: &str) -> Result<DataFrame, IoError> {
5408 read_hdf_with_options(
5409 path,
5410 &HdfReadOptions {
5411 key: key.to_owned(),
5412 },
5413 )
5414}
5415
5416#[cfg(feature = "hdf5")]
5418pub fn read_hdf_with_options(path: &Path, options: &HdfReadOptions) -> Result<DataFrame, IoError> {
5419 let key = normalize_hdf5_key(&options.key)?;
5420 let dataset_path = hdf5_payload_path(&key);
5421 let file = Hdf5File::open(path).map_err(hdf5_error)?;
5422 let dataset = file.dataset(&dataset_path).map_err(|err| {
5423 IoError::Hdf5(format!(
5424 "missing FrankenPandas payload dataset '{dataset_path}': {err}"
5425 ))
5426 })?;
5427 let payload = dataset.read_raw::<u8>().map_err(hdf5_error)?;
5428 read_pickle_bytes(&payload).map_err(|err| {
5429 IoError::Hdf5(format!(
5430 "invalid FrankenPandas payload at key '{key}': {err}"
5431 ))
5432 })
5433}
5434
5435#[cfg(not(feature = "hdf5"))]
5437pub fn read_hdf_with_options(
5438 _path: &Path,
5439 _options: &HdfReadOptions,
5440) -> Result<DataFrame, IoError> {
5441 hdf5_feature_disabled()
5442}
5443
5444pub fn write_hdf(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5446 write_hdf_with_options(frame, path, &HdfWriteOptions::default())
5447}
5448
5449pub fn write_hdf_series(series: &Series, path: &Path) -> Result<(), IoError> {
5453 let frame = series
5454 .to_frame(Some(series.name()))
5455 .map_err(|e| IoError::Hdf5(format!("Series to DataFrame conversion: {e}")))?;
5456 write_hdf(&frame, path)
5457}
5458
5459pub fn write_hdf_series_key(series: &Series, path: &Path, key: &str) -> Result<(), IoError> {
5461 let frame = series
5462 .to_frame(Some(series.name()))
5463 .map_err(|e| IoError::Hdf5(format!("Series to DataFrame conversion: {e}")))?;
5464 write_hdf_key(&frame, path, key)
5465}
5466
5467pub fn write_hdf_key(frame: &DataFrame, path: &Path, key: &str) -> Result<(), IoError> {
5469 write_hdf_with_options(
5470 frame,
5471 path,
5472 &HdfWriteOptions {
5473 key: key.to_owned(),
5474 },
5475 )
5476}
5477
5478#[cfg(feature = "hdf5")]
5480pub fn write_hdf_with_options(
5481 frame: &DataFrame,
5482 path: &Path,
5483 options: &HdfWriteOptions,
5484) -> Result<(), IoError> {
5485 let key = normalize_hdf5_key(&options.key)?;
5486 let payload = write_pickle_bytes(frame)?;
5487 let file = Hdf5File::create(path).map_err(hdf5_error)?;
5488 let group = file.create_group(&key).map_err(hdf5_error)?;
5489 group
5490 .new_dataset_builder()
5491 .with_data(payload.as_slice())
5492 .create(HDF5_PAYLOAD_DATASET)
5493 .map_err(hdf5_error)?;
5494 file.flush().map_err(hdf5_error)?;
5495 Ok(())
5496}
5497
5498#[cfg(not(feature = "hdf5"))]
5500pub fn write_hdf_with_options(
5501 _frame: &DataFrame,
5502 _path: &Path,
5503 _options: &HdfWriteOptions,
5504) -> Result<(), IoError> {
5505 hdf5_feature_disabled()
5506}
5507
5508#[cfg(feature = "hdf5")]
5509fn normalize_hdf5_key(key: &str) -> Result<String, IoError> {
5510 let trimmed = key.trim_matches('/');
5511 if trimmed.is_empty() {
5512 return Err(IoError::Hdf5(
5513 "hdf5 key must name a non-root group".to_owned(),
5514 ));
5515 }
5516
5517 for part in trimmed.split('/') {
5518 if part.is_empty() || part == "." || part == ".." {
5519 return Err(IoError::Hdf5(format!("invalid hdf5 key '{key}'")));
5520 }
5521 if part == HDF5_PAYLOAD_DATASET {
5522 return Err(IoError::Hdf5(format!(
5523 "hdf5 key '{key}' uses reserved FrankenPandas dataset name"
5524 )));
5525 }
5526 }
5527
5528 Ok(trimmed.to_owned())
5529}
5530
5531#[cfg(feature = "hdf5")]
5532fn hdf5_payload_path(key: &str) -> String {
5533 format!("{key}/{HDF5_PAYLOAD_DATASET}")
5534}
5535
5536#[cfg(feature = "hdf5")]
5537fn hdf5_error(err: hdf5::Error) -> IoError {
5538 IoError::Hdf5(err.to_string())
5539}
5540
5541#[cfg(not(feature = "hdf5"))]
5542fn hdf5_feature_disabled<T>() -> Result<T, IoError> {
5543 Err(IoError::Hdf5(
5544 "hdf5 support is disabled; enable the fp-io `hdf5` feature".to_owned(),
5545 ))
5546}
5547
5548pub fn read_stata(path: &Path) -> Result<DataFrame, IoError> {
5552 let content = std::fs::read(path)?;
5553 read_stata_bytes(&content)
5554}
5555
5556pub fn write_stata(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5558 write_stata_with_options(frame, path, &StataWriteOptions::default())
5559}
5560
5561pub fn write_stata_with_options(
5563 frame: &DataFrame,
5564 path: &Path,
5565 options: &StataWriteOptions,
5566) -> Result<(), IoError> {
5567 let content = write_stata_bytes_with_options(frame, options)?;
5568 std::fs::write(path, content)?;
5569 Ok(())
5570}
5571
5572pub fn write_jsonl_string(frame: &DataFrame) -> Result<String, IoError> {
5581 let headers: Vec<String> = frame.column_names().into_iter().cloned().collect();
5582 let row_count = frame.index().len();
5583 let column_float_promotions = headers
5584 .iter()
5585 .map(|name| {
5586 frame
5587 .column(name)
5588 .is_some_and(|column| column_promotes_int_json_values_to_float(column.values()))
5589 })
5590 .collect::<Vec<_>>();
5591
5592 let mut lines = Vec::with_capacity(row_count);
5593 for row_idx in 0..row_count {
5594 let mut obj = serde_json::Map::new();
5595 for (name, promote_int_to_float) in headers.iter().zip(column_float_promotions.iter()) {
5596 let val = frame
5597 .column(name)
5598 .and_then(|c| c.value(row_idx))
5599 .map(|value| scalar_to_json_with_column_promotion(value, *promote_int_to_float))
5600 .unwrap_or(serde_json::Value::Null);
5601 obj.insert(name.clone(), val);
5602 }
5603 lines.push(serde_json::to_string(&serde_json::Value::Object(obj))?);
5604 }
5605
5606 Ok(lines.join("\n"))
5607}
5608
5609const READ_JSONL_MAX_ROWS: usize = 100_000_000;
5617
5618pub fn read_jsonl_str(input: &str) -> Result<DataFrame, IoError> {
5619 let mut all_rows: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
5620
5621 for line in input.lines() {
5622 let trimmed = line.trim();
5623 if trimmed.is_empty() {
5624 continue;
5625 }
5626 if all_rows.len() >= READ_JSONL_MAX_ROWS {
5629 return Err(IoError::JsonFormat(format!(
5630 "JSONL input exceeds maximum of {READ_JSONL_MAX_ROWS} rows"
5631 )));
5632 }
5633 let parsed = parse_json_value_allowing_pandas_nan(trimmed)?;
5634 let obj = parsed
5635 .as_object()
5636 .ok_or_else(|| IoError::JsonFormat("JSONL: each line must be a JSON object".into()))?;
5637 all_rows.push(obj.clone());
5638 }
5639
5640 if all_rows.is_empty() {
5641 return DataFrame::new(Index::new(Vec::new()), BTreeMap::new()).map_err(IoError::Frame);
5642 }
5643
5644 let mut col_name_set = std::collections::BTreeSet::new();
5647 let mut col_names_ordered: Vec<String> = Vec::new();
5648 for row in &all_rows {
5649 for key in row.keys() {
5650 if col_name_set.insert(key.clone()) {
5651 col_names_ordered.push(key.clone());
5652 }
5653 }
5654 }
5655 let col_names = col_names_ordered;
5656 let mut columns: Vec<Vec<Scalar>> = col_names
5657 .iter()
5658 .map(|_| Vec::with_capacity(all_rows.len()))
5659 .collect();
5660
5661 for row in &all_rows {
5662 for (col_idx, name) in col_names.iter().enumerate() {
5663 let val = row.get(name).unwrap_or(&serde_json::Value::Null);
5664 columns[col_idx].push(json_value_to_scalar(val));
5665 }
5666 }
5667
5668 let mut out_columns = BTreeMap::new();
5669 let mut column_order = Vec::new();
5670 for (name, values) in col_names.into_iter().zip(columns) {
5671 out_columns.insert(name.clone(), column_from_json_values(values)?);
5672 column_order.push(name);
5673 }
5674
5675 let index = Index::from_i64((0..all_rows.len() as i64).collect());
5676 Ok(DataFrame::new_with_column_order(
5677 index,
5678 out_columns,
5679 column_order,
5680 )?)
5681}
5682
5683pub fn write_jsonl(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5685 let content = write_jsonl_string(frame)?;
5686 std::fs::write(path, content)?;
5687 Ok(())
5688}
5689
5690pub fn read_jsonl(path: &Path) -> Result<DataFrame, IoError> {
5692 let content = std::fs::read_to_string(path)?;
5693 read_jsonl_str(&content)
5694}
5695
5696fn dtype_to_arrow(dtype: DType) -> ArrowDataType {
5700 match dtype {
5701 DType::Int64 | DType::Int64Nullable => ArrowDataType::Int64,
5702 DType::Float64 => ArrowDataType::Float64,
5703 DType::Utf8 => ArrowDataType::Utf8,
5704 DType::Categorical => ArrowDataType::Utf8,
5705 DType::Bool | DType::BoolNullable => ArrowDataType::Boolean,
5706 DType::Null => ArrowDataType::Utf8, DType::Timedelta64 => ArrowDataType::Int64, DType::Datetime64 => ArrowDataType::Int64, DType::Period => ArrowDataType::Int64, DType::Interval => ArrowDataType::Utf8, DType::Sparse => ArrowDataType::Utf8, }
5713}
5714
5715fn column_to_arrow_array(column: &Column) -> Result<Arc<dyn Array>, IoError> {
5716 let arr: Arc<dyn Array> = match column.dtype() {
5717 DType::Int64 | DType::Int64Nullable => {
5718 let mut builder = Int64Builder::with_capacity(column.len());
5719 for value in column.values() {
5720 match value {
5721 Scalar::Int64(n) => builder.append_value(*n),
5722 _ if value.is_missing() => builder.append_null(),
5723 _ => builder.append_null(),
5724 }
5725 }
5726 Arc::new(builder.finish())
5727 }
5728 DType::Float64 => {
5729 let mut builder = Float64Builder::with_capacity(column.len());
5730 for value in column.values() {
5731 match value {
5732 Scalar::Float64(n) => {
5733 if n.is_nan() {
5734 builder.append_null();
5735 } else {
5736 builder.append_value(*n);
5737 }
5738 }
5739 _ if value.is_missing() => builder.append_null(),
5740 _ => builder.append_null(),
5741 }
5742 }
5743 Arc::new(builder.finish())
5744 }
5745 DType::Bool | DType::BoolNullable => {
5746 let mut builder = BooleanBuilder::with_capacity(column.len());
5747 for value in column.values() {
5748 match value {
5749 Scalar::Bool(flag) => builder.append_value(*flag),
5750 _ if value.is_missing() => builder.append_null(),
5751 _ => builder.append_null(),
5752 }
5753 }
5754 Arc::new(builder.finish())
5755 }
5756 DType::Utf8 | DType::Categorical | DType::Null | DType::Sparse => {
5757 let mut builder = StringBuilder::with_capacity(column.len(), column.len() * 8);
5758 for value in column.values() {
5759 match value {
5760 Scalar::Utf8(text) => builder.append_value(text),
5761 _ if value.is_missing() => builder.append_null(),
5762 _ => builder.append_value(format!("{value:?}")),
5763 }
5764 }
5765 Arc::new(builder.finish())
5766 }
5767 DType::Timedelta64 => {
5768 let mut builder = Int64Builder::with_capacity(column.len());
5769 for value in column.values() {
5770 match value {
5771 Scalar::Timedelta64(nanos) => {
5772 if *nanos == Timedelta::NAT {
5773 builder.append_null();
5774 } else {
5775 builder.append_value(*nanos);
5776 }
5777 }
5778 _ if value.is_missing() => builder.append_null(),
5779 _ => builder.append_null(),
5780 }
5781 }
5782 Arc::new(builder.finish())
5783 }
5784 DType::Datetime64 => {
5785 let mut builder = Int64Builder::with_capacity(column.len());
5786 for value in column.values() {
5787 match value {
5788 Scalar::Datetime64(nanos) => {
5789 if *nanos == Timestamp::NAT {
5790 builder.append_null();
5791 } else {
5792 builder.append_value(*nanos);
5793 }
5794 }
5795 _ if value.is_missing() => builder.append_null(),
5796 _ => builder.append_null(),
5797 }
5798 }
5799 Arc::new(builder.finish())
5800 }
5801 DType::Period => {
5802 let mut builder = Int64Builder::with_capacity(column.len());
5803 for value in column.values() {
5804 match value {
5805 Scalar::Period(ordinal) => {
5806 if *ordinal == i64::MIN {
5807 builder.append_null();
5808 } else {
5809 builder.append_value(*ordinal);
5810 }
5811 }
5812 _ if value.is_missing() => builder.append_null(),
5813 _ => builder.append_null(),
5814 }
5815 }
5816 Arc::new(builder.finish())
5817 }
5818 DType::Interval => {
5819 let mut builder = StringBuilder::with_capacity(column.len(), column.len() * 32);
5820 for value in column.values() {
5821 match value {
5822 Scalar::Interval(iv) => builder.append_value(format!("{iv}")),
5823 _ if value.is_missing() => builder.append_null(),
5824 _ => builder.append_null(),
5825 }
5826 }
5827 Arc::new(builder.finish())
5828 }
5829 };
5830
5831 Ok(arr)
5832}
5833
5834pub fn series_to_arrow_array(series: &Series) -> Result<(ArrowDataType, Arc<dyn Array>), IoError> {
5840 let dt = dtype_to_arrow(series.column().dtype());
5841 Ok((dt, column_to_arrow_array(series.column())?))
5842}
5843
5844pub fn series_from_arrow_array(
5846 name: impl Into<String>,
5847 index_labels: Vec<IndexLabel>,
5848 arr: &dyn Array,
5849 dt: &ArrowDataType,
5850) -> Result<Series, IoError> {
5851 let values = arrow_array_to_scalars(arr, dt)?;
5852 Series::from_values(name, index_labels, values).map_err(IoError::from)
5853}
5854
5855fn dataframe_to_record_batch(frame: &DataFrame) -> Result<RecordBatch, IoError> {
5857 let materialized = if frame.row_multiindex().is_some() {
5858 Some(materialize_synthetic_row_multiindex_columns(frame)?)
5859 } else {
5860 None
5861 };
5862 let frame = materialized.as_ref().unwrap_or(frame);
5863
5864 let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
5865 let mut fields = Vec::with_capacity(col_names.len());
5866 let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(col_names.len());
5867
5868 for name in &col_names {
5869 let col = frame
5870 .column(name)
5871 .ok_or_else(|| IoError::Parquet(format!("missing column: {name}")))?;
5872 let dt = col.dtype();
5873 fields.push(Field::new(name.as_str(), dtype_to_arrow(dt), true));
5874 let arr = column_to_arrow_array(col)?;
5875 arrays.push(arr);
5876 }
5877
5878 let schema = Arc::new(Schema::new(fields));
5879 RecordBatch::try_new(schema, arrays).map_err(|e| IoError::Parquet(e.to_string()))
5880}
5881
5882fn record_batch_to_dataframe(batch: &RecordBatch) -> Result<DataFrame, IoError> {
5884 let n_rows = batch.num_rows();
5885 let schema = batch.schema();
5886 let mut columns = BTreeMap::new();
5887 let mut col_order = Vec::new();
5888
5889 for (i, field) in schema.fields().iter().enumerate() {
5890 let name = field.name().clone();
5891 let arr = batch.column(i);
5892 let values = arrow_array_to_scalars(arr.as_ref(), field.data_type())?;
5893 let dtype = fp_dtype_for_arrow_data_type(field.data_type());
5894 let col = Column::new(dtype, values)?;
5895 columns.insert(name.clone(), col);
5896 col_order.push(name);
5897 }
5898
5899 let labels: Vec<IndexLabel> = (0..n_rows).map(|i| IndexLabel::Int64(i as i64)).collect();
5900 let index = Index::new(labels);
5901
5902 let frame = DataFrame::new_with_column_order(index, columns, col_order)?;
5903 promote_synthetic_row_multiindex_if_present(&frame)
5904}
5905
5906fn fp_dtype_for_arrow_data_type(dt: &ArrowDataType) -> DType {
5907 match dt {
5908 ArrowDataType::Int8
5909 | ArrowDataType::Int16
5910 | ArrowDataType::Int32
5911 | ArrowDataType::Int64
5912 | ArrowDataType::UInt8
5913 | ArrowDataType::UInt16
5914 | ArrowDataType::UInt32
5915 | ArrowDataType::UInt64 => DType::Int64,
5916 ArrowDataType::Float16 | ArrowDataType::Float32 | ArrowDataType::Float64 => DType::Float64,
5917 ArrowDataType::Boolean => DType::Bool,
5918 ArrowDataType::Utf8
5919 | ArrowDataType::LargeUtf8
5920 | ArrowDataType::Date32
5921 | ArrowDataType::Date64
5922 | ArrowDataType::Timestamp(_, _) => DType::Utf8,
5923 _ => DType::Utf8,
5924 }
5925}
5926
5927fn arrow_array_to_scalars(arr: &dyn Array, dt: &ArrowDataType) -> Result<Vec<Scalar>, IoError> {
5929 let len = arr.len();
5930 let mut scalars = Vec::with_capacity(len);
5931
5932 match dt {
5933 ArrowDataType::Int64 => {
5934 let typed = arr
5935 .as_any()
5936 .downcast_ref::<Int64Array>()
5937 .ok_or_else(|| IoError::Parquet("expected Int64Array".into()))?;
5938 for i in 0..len {
5939 if typed.is_null(i) {
5940 scalars.push(Scalar::Null(NullKind::Null));
5941 } else {
5942 scalars.push(Scalar::Int64(typed.value(i)));
5943 }
5944 }
5945 }
5946 ArrowDataType::Int32 => {
5947 let typed = arr
5948 .as_any()
5949 .downcast_ref::<arrow::array::Int32Array>()
5950 .ok_or_else(|| IoError::Parquet("expected Int32Array".into()))?;
5951 for i in 0..len {
5952 if typed.is_null(i) {
5953 scalars.push(Scalar::Null(NullKind::Null));
5954 } else {
5955 scalars.push(Scalar::Int64(i64::from(typed.value(i))));
5956 }
5957 }
5958 }
5959 ArrowDataType::Float64 => {
5960 let typed = arr
5961 .as_any()
5962 .downcast_ref::<Float64Array>()
5963 .ok_or_else(|| IoError::Parquet("expected Float64Array".into()))?;
5964 for i in 0..len {
5965 if typed.is_null(i) {
5966 scalars.push(Scalar::Null(NullKind::NaN));
5967 } else {
5968 scalars.push(Scalar::Float64(typed.value(i)));
5969 }
5970 }
5971 }
5972 ArrowDataType::Float32 => {
5973 let typed = arr
5974 .as_any()
5975 .downcast_ref::<arrow::array::Float32Array>()
5976 .ok_or_else(|| IoError::Parquet("expected Float32Array".into()))?;
5977 for i in 0..len {
5978 if typed.is_null(i) {
5979 scalars.push(Scalar::Null(NullKind::NaN));
5980 } else {
5981 scalars.push(Scalar::Float64(f64::from(typed.value(i))));
5982 }
5983 }
5984 }
5985 ArrowDataType::Boolean => {
5986 let typed = arr
5987 .as_any()
5988 .downcast_ref::<BooleanArray>()
5989 .ok_or_else(|| IoError::Parquet("expected BooleanArray".into()))?;
5990 for i in 0..len {
5991 if typed.is_null(i) {
5992 scalars.push(Scalar::Null(NullKind::Null));
5993 } else {
5994 scalars.push(Scalar::Bool(typed.value(i)));
5995 }
5996 }
5997 }
5998 ArrowDataType::Utf8 => {
5999 let typed = arr
6000 .as_any()
6001 .downcast_ref::<StringArray>()
6002 .ok_or_else(|| IoError::Parquet("expected StringArray".into()))?;
6003 for i in 0..len {
6004 if typed.is_null(i) {
6005 scalars.push(Scalar::Null(NullKind::Null));
6006 } else {
6007 scalars.push(Scalar::Utf8(typed.value(i).to_owned()));
6008 }
6009 }
6010 }
6011 ArrowDataType::LargeUtf8 => {
6012 let typed = arr
6013 .as_any()
6014 .downcast_ref::<arrow::array::LargeStringArray>()
6015 .ok_or_else(|| IoError::Parquet("expected LargeStringArray".into()))?;
6016 for i in 0..len {
6017 if typed.is_null(i) {
6018 scalars.push(Scalar::Null(NullKind::Null));
6019 } else {
6020 scalars.push(Scalar::Utf8(typed.value(i).to_owned()));
6021 }
6022 }
6023 }
6024 ArrowDataType::Date32 => {
6025 let typed = arr
6026 .as_any()
6027 .downcast_ref::<Date32Array>()
6028 .ok_or_else(|| IoError::Parquet("expected Date32Array".into()))?;
6029 for i in 0..len {
6030 if typed.is_null(i) {
6031 scalars.push(Scalar::Null(NullKind::NaT));
6032 } else {
6033 if let Some(date) = arrow::temporal_conversions::as_date::<
6034 arrow::datatypes::Date32Type,
6035 >(typed.value(i).into())
6036 {
6037 scalars.push(Scalar::Utf8(date.format("%Y-%m-%d").to_string()));
6038 } else {
6039 scalars.push(Scalar::Null(NullKind::NaT));
6040 }
6041 }
6042 }
6043 }
6044 ArrowDataType::Date64 => {
6045 let typed = arr
6046 .as_any()
6047 .downcast_ref::<Date64Array>()
6048 .ok_or_else(|| IoError::Parquet("expected Date64Array".into()))?;
6049 for i in 0..len {
6050 if typed.is_null(i) {
6051 scalars.push(Scalar::Null(NullKind::NaT));
6052 } else {
6053 if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6054 arrow::datatypes::Date64Type,
6055 >(typed.value(i))
6056 {
6057 scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d").to_string()));
6058 } else {
6059 scalars.push(Scalar::Null(NullKind::NaT));
6060 }
6061 }
6062 }
6063 }
6064 ArrowDataType::Timestamp(unit, _tz) => match unit {
6065 TimeUnit::Second => {
6066 let typed = arr
6067 .as_any()
6068 .downcast_ref::<TimestampSecondArray>()
6069 .ok_or_else(|| IoError::Parquet("expected TimestampSecondArray".into()))?;
6070 for i in 0..len {
6071 if typed.is_null(i) {
6072 scalars.push(Scalar::Null(NullKind::NaT));
6073 } else {
6074 if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6075 arrow::datatypes::TimestampSecondType,
6076 >(typed.value(i))
6077 {
6078 scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S").to_string()));
6079 } else {
6080 scalars.push(Scalar::Null(NullKind::NaT));
6081 }
6082 }
6083 }
6084 }
6085 TimeUnit::Millisecond => {
6086 let typed = arr
6087 .as_any()
6088 .downcast_ref::<TimestampMillisecondArray>()
6089 .ok_or_else(|| IoError::Parquet("expected TimestampMillisecondArray".into()))?;
6090 for i in 0..len {
6091 if typed.is_null(i) {
6092 scalars.push(Scalar::Null(NullKind::NaT));
6093 } else {
6094 if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6095 arrow::datatypes::TimestampMillisecondType,
6096 >(typed.value(i))
6097 {
6098 scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S").to_string()));
6099 } else {
6100 scalars.push(Scalar::Null(NullKind::NaT));
6101 }
6102 }
6103 }
6104 }
6105 TimeUnit::Microsecond => {
6106 let typed = arr
6107 .as_any()
6108 .downcast_ref::<TimestampMicrosecondArray>()
6109 .ok_or_else(|| IoError::Parquet("expected TimestampMicrosecondArray".into()))?;
6110 for i in 0..len {
6111 if typed.is_null(i) {
6112 scalars.push(Scalar::Null(NullKind::NaT));
6113 } else {
6114 if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6115 arrow::datatypes::TimestampMicrosecondType,
6116 >(typed.value(i))
6117 {
6118 scalars
6119 .push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S%.6f").to_string()));
6120 } else {
6121 scalars.push(Scalar::Null(NullKind::NaT));
6122 }
6123 }
6124 }
6125 }
6126 TimeUnit::Nanosecond => {
6127 let typed = arr
6128 .as_any()
6129 .downcast_ref::<TimestampNanosecondArray>()
6130 .ok_or_else(|| IoError::Parquet("expected TimestampNanosecondArray".into()))?;
6131 for i in 0..len {
6132 if typed.is_null(i) {
6133 scalars.push(Scalar::Null(NullKind::NaT));
6134 } else {
6135 if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6136 arrow::datatypes::TimestampNanosecondType,
6137 >(typed.value(i))
6138 {
6139 scalars
6140 .push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S%.9f").to_string()));
6141 } else {
6142 scalars.push(Scalar::Null(NullKind::NaT));
6143 }
6144 }
6145 }
6146 }
6147 },
6148 other => {
6149 return Err(IoError::Parquet(format!(
6150 "unsupported Arrow data type: {other:?}"
6151 )));
6152 }
6153 }
6154
6155 Ok(scalars)
6156}
6157
6158pub fn write_parquet_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6160 let batch = dataframe_to_record_batch(frame)?;
6161 let mut buf = Vec::new();
6162 let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), None)
6163 .map_err(|e| IoError::Parquet(e.to_string()))?;
6164 writer
6165 .write(&batch)
6166 .map_err(|e| IoError::Parquet(e.to_string()))?;
6167 writer
6168 .close()
6169 .map_err(|e| IoError::Parquet(e.to_string()))?;
6170 Ok(buf)
6171}
6172
6173pub fn read_parquet_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
6175 let b = bytes::Bytes::from(data.to_vec());
6176 let reader = ParquetRecordBatchReaderBuilder::try_new(b)
6177 .map_err(|e| IoError::Parquet(e.to_string()))?
6178 .build()
6179 .map_err(|e| IoError::Parquet(e.to_string()))?;
6180
6181 let mut all_frames: Vec<DataFrame> = Vec::new();
6182 for batch_result in reader {
6183 let batch: RecordBatch =
6184 batch_result.map_err(|e: arrow::error::ArrowError| IoError::Parquet(e.to_string()))?;
6185 all_frames.push(record_batch_to_dataframe(&batch)?);
6186 }
6187
6188 if all_frames.is_empty() {
6189 return Ok(DataFrame::new_with_column_order(
6191 Index::new(vec![]),
6192 BTreeMap::new(),
6193 vec![],
6194 )?);
6195 }
6196
6197 if all_frames.len() == 1 {
6199 if let Some(frame) = all_frames.into_iter().next() {
6200 return Ok(frame);
6201 }
6202 return Err(IoError::Parquet(
6203 "parquet reader produced zero record batches".to_owned(),
6204 ));
6205 }
6206
6207 let refs: Vec<&DataFrame> = all_frames.iter().collect();
6209 fp_frame::concat_dataframes(&refs).map_err(IoError::from)
6210}
6211
6212pub fn write_parquet(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6214 let bytes = write_parquet_bytes(frame)?;
6215 std::fs::write(path, bytes)?;
6216 Ok(())
6217}
6218
6219pub fn read_parquet(path: &Path) -> Result<DataFrame, IoError> {
6221 let data = std::fs::read(path)?;
6222 read_parquet_bytes(&data)
6223}
6224
6225pub fn write_orc_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6232 let batch = dataframe_to_record_batch(frame)?;
6233 let mut buf = Vec::new();
6234 let mut writer = OrcArrowWriterBuilder::new(&mut buf, batch.schema())
6235 .try_build()
6236 .map_err(|err| IoError::Orc(err.to_string()))?;
6237 writer
6238 .write(&batch)
6239 .map_err(|err| IoError::Orc(err.to_string()))?;
6240 writer
6241 .close()
6242 .map_err(|err| IoError::Orc(err.to_string()))?;
6243 Ok(buf)
6244}
6245
6246pub fn read_orc_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
6248 let bytes = bytes::Bytes::from(data.to_vec());
6249 let reader = OrcArrowReaderBuilder::try_new(bytes)
6250 .map_err(|err| IoError::Orc(err.to_string()))?
6251 .build();
6252
6253 let mut all_frames: Vec<DataFrame> = Vec::new();
6254 for batch_result in reader {
6255 let batch = batch_result.map_err(|err| IoError::Orc(err.to_string()))?;
6256 all_frames.push(record_batch_to_dataframe(&batch)?);
6257 }
6258
6259 if all_frames.is_empty() {
6260 return Ok(DataFrame::new_with_column_order(
6261 Index::new(vec![]),
6262 BTreeMap::new(),
6263 vec![],
6264 )?);
6265 }
6266
6267 if all_frames.len() == 1 {
6268 if let Some(frame) = all_frames.into_iter().next() {
6269 return Ok(frame);
6270 }
6271 return Err(IoError::Orc(
6272 "orc reader produced zero record batches".to_owned(),
6273 ));
6274 }
6275
6276 let refs: Vec<&DataFrame> = all_frames.iter().collect();
6277 fp_frame::concat_dataframes(&refs).map_err(IoError::from)
6278}
6279
6280pub fn write_orc(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6282 let bytes = write_orc_bytes(frame)?;
6283 std::fs::write(path, bytes)?;
6284 Ok(())
6285}
6286
6287pub fn read_orc(path: &Path) -> Result<DataFrame, IoError> {
6289 let data = std::fs::read(path)?;
6290 read_orc_bytes(&data)
6291}
6292
6293#[derive(Debug, Clone)]
6297pub struct ExcelReadOptions {
6298 pub sheet_name: Option<String>,
6300 pub has_headers: bool,
6302 pub usecols: Option<Vec<String>>,
6305 pub names: Option<Vec<String>>,
6308 pub index_col: Option<String>,
6310 pub skip_rows: usize,
6312}
6313
6314impl Default for ExcelReadOptions {
6315 fn default() -> Self {
6316 Self {
6317 sheet_name: None,
6318 has_headers: true,
6319 usecols: None,
6320 names: None,
6321 index_col: None,
6322 skip_rows: 0,
6323 }
6324 }
6325}
6326
6327fn excel_cell_to_scalar(cell: &calamine::Data) -> Scalar {
6329 match cell {
6330 calamine::Data::Int(v) => Scalar::Int64(*v),
6331 calamine::Data::Float(v) => {
6332 if v.is_nan() {
6333 Scalar::Null(NullKind::NaN)
6334 } else if v.fract() == 0.0 && *v >= i64::MIN as f64 && *v <= i64::MAX as f64 {
6335 Scalar::Int64(*v as i64)
6337 } else {
6338 Scalar::Float64(*v)
6339 }
6340 }
6341 calamine::Data::String(s) => {
6342 if s.is_empty() {
6343 Scalar::Null(NullKind::Null)
6344 } else {
6345 Scalar::Utf8(s.clone())
6346 }
6347 }
6348 calamine::Data::Bool(b) => Scalar::Bool(*b),
6349 calamine::Data::Empty => Scalar::Null(NullKind::Null),
6350 calamine::Data::DateTime(dt) => {
6351 Scalar::Utf8(format!("{dt}"))
6353 }
6354 calamine::Data::DateTimeIso(s) => Scalar::Utf8(s.clone()),
6355 calamine::Data::DurationIso(s) => Scalar::Utf8(s.clone()),
6356 calamine::Data::Error(e) => Scalar::Utf8(format!("#ERROR:{e:?}")),
6357 }
6358}
6359
6360fn scalar_to_index_label(scalar: Scalar) -> IndexLabel {
6362 match scalar {
6363 Scalar::Int64(v) => IndexLabel::Int64(v),
6364 Scalar::Utf8(s) => IndexLabel::Utf8(s),
6365 Scalar::Float64(v) if v.fract() == 0.0 && v >= i64::MIN as f64 && v <= i64::MAX as f64 => {
6366 IndexLabel::Int64(v as i64)
6367 }
6368 Scalar::Float64(v) => IndexLabel::Utf8(v.to_string()),
6369 Scalar::Bool(b) => IndexLabel::Utf8(if b { "True" } else { "False" }.to_string()),
6370 _ => IndexLabel::Utf8(String::new()),
6371 }
6372}
6373
6374fn infer_writer_emitted_default_excel_index_col(
6375 headers: &[String],
6376 header_generated: &[bool],
6377 columns: &[Vec<Scalar>],
6378 options: &ExcelReadOptions,
6379) -> Option<usize> {
6380 if !options.has_headers
6381 || options.index_col.is_some()
6382 || options.usecols.is_some()
6383 || options.names.is_some()
6384 {
6385 return None;
6386 }
6387
6388 if headers.first()?.as_str() != "column_0"
6389 || !header_generated.first().copied().unwrap_or(false)
6390 {
6391 return None;
6392 }
6393
6394 let first_col = columns.first()?;
6395 if first_col
6396 .iter()
6397 .enumerate()
6398 .all(|(idx, scalar)| matches!(scalar, Scalar::Int64(value) if *value == idx as i64))
6399 {
6400 Some(0)
6401 } else {
6402 None
6403 }
6404}
6405
6406fn parse_excel_rows(
6408 rows: Vec<Vec<calamine::Data>>,
6409 options: &ExcelReadOptions,
6410) -> Result<DataFrame, IoError> {
6411 if rows.is_empty() {
6412 return DataFrame::new(Index::new(Vec::new()), BTreeMap::new()).map_err(IoError::Frame);
6413 }
6414
6415 let resolve_names = |width: usize| -> Result<Option<Vec<String>>, IoError> {
6416 options.names.as_ref().map_or(Ok(None), |names| {
6417 if names.len() == width {
6418 Ok(Some(names.clone()))
6419 } else {
6420 Err(IoError::Excel(format!(
6421 "expected {width} column names, got {}",
6422 names.len()
6423 )))
6424 }
6425 })
6426 };
6427
6428 let (headers, header_generated, data_rows) = if options.has_headers {
6430 let header_row = &rows[0];
6431 let header_width = header_row.len();
6432 let provided_names = resolve_names(header_width)?;
6433 let (headers, header_generated): (Vec<_>, Vec<_>) = if let Some(names) = provided_names {
6434 (names, vec![false; header_width])
6435 } else {
6436 let header_pairs: Vec<(String, bool)> = header_row
6437 .iter()
6438 .enumerate()
6439 .map(|(i, cell)| match cell {
6440 calamine::Data::String(s) if !s.is_empty() => (s.clone(), false),
6441 _ => (format!("column_{i}"), true),
6442 })
6443 .collect();
6444 header_pairs.into_iter().unzip()
6445 };
6446 (headers, header_generated, &rows[1..])
6447 } else {
6448 let ncols = rows.iter().map(Vec::len).max().unwrap_or(0);
6449 let provided_names = resolve_names(ncols)?;
6450 let (headers, header_generated) = if let Some(names) = provided_names {
6451 (names, vec![false; ncols])
6452 } else {
6453 let headers: Vec<String> = (0..ncols).map(|i| format!("column_{i}")).collect();
6454 let header_generated = vec![true; ncols];
6455 (headers, header_generated)
6456 };
6457 (headers, header_generated, rows.as_slice())
6458 };
6459 reject_duplicate_headers(&headers)?;
6460
6461 if let Some(ref usecols) = options.usecols {
6462 validate_usecols(&headers, usecols)?;
6463 }
6464
6465 let ncols = headers.len();
6466
6467 let mut columns: Vec<Vec<Scalar>> = (0..ncols)
6469 .map(|_| Vec::with_capacity(data_rows.len()))
6470 .collect();
6471
6472 for row in data_rows {
6473 for (col_idx, col_vec) in columns.iter_mut().enumerate() {
6474 let cell = row.get(col_idx).unwrap_or(&calamine::Data::Empty);
6475 col_vec.push(excel_cell_to_scalar(cell));
6476 }
6477 }
6478
6479 let (headers, header_generated, columns) = if let Some(ref usecols) = options.usecols {
6480 let mut filtered_headers = Vec::new();
6481 let mut filtered_generated = Vec::new();
6482 let mut filtered_columns = Vec::new();
6483 for ((name, generated), values) in headers.into_iter().zip(header_generated).zip(columns) {
6484 if usecols.contains(&name) {
6485 filtered_headers.push(name);
6486 filtered_generated.push(generated);
6487 filtered_columns.push(values);
6488 }
6489 }
6490 (filtered_headers, filtered_generated, filtered_columns)
6491 } else {
6492 (headers, header_generated, columns)
6493 };
6494
6495 let index_col_idx = if let Some(ref idx_name) = options.index_col {
6497 let pos = headers.iter().position(|h| h == idx_name);
6498 if pos.is_none() {
6499 return Err(IoError::MissingIndexColumn(idx_name.clone()));
6500 }
6501 pos
6502 } else {
6503 infer_writer_emitted_default_excel_index_col(&headers, &header_generated, &columns, options)
6504 };
6505
6506 let index_name = index_col_idx.and_then(|idx_pos| {
6507 if !header_generated[idx_pos] {
6508 Some(headers[idx_pos].clone())
6509 } else {
6510 None
6511 }
6512 });
6513
6514 let mut out_columns = BTreeMap::new();
6515 let mut column_order = Vec::new();
6516
6517 for (idx, (name, values)) in headers.into_iter().zip(columns).enumerate() {
6518 if Some(idx) == index_col_idx {
6519 continue; }
6521 out_columns.insert(name.clone(), Column::from_values(values)?);
6522 column_order.push(name);
6523 }
6524
6525 let index = if let Some(idx_pos) = index_col_idx {
6526 let idx_labels: Vec<IndexLabel> = data_rows
6527 .iter()
6528 .map(|row| {
6529 let cell = row.get(idx_pos).unwrap_or(&calamine::Data::Empty);
6530 scalar_to_index_label(excel_cell_to_scalar(cell))
6531 })
6532 .collect();
6533 Index::new(idx_labels).set_names(index_name.as_deref())
6534 } else {
6535 Index::from_i64((0..data_rows.len() as i64).collect())
6536 };
6537
6538 Ok(DataFrame::new_with_column_order(
6539 index,
6540 out_columns,
6541 column_order,
6542 )?)
6543}
6544
6545pub fn read_excel(path: &Path, options: &ExcelReadOptions) -> Result<DataFrame, IoError> {
6549 use calamine::{Reader, open_workbook_auto};
6550
6551 let mut workbook = open_workbook_auto(path)
6552 .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6553
6554 let sheet_name = if let Some(ref name) = options.sheet_name {
6555 name.clone()
6556 } else {
6557 let names = workbook.sheet_names();
6558 if names.is_empty() {
6559 return Err(IoError::Excel("workbook contains no sheets".into()));
6560 }
6561 names[0].clone()
6562 };
6563
6564 let range = workbook
6565 .worksheet_range(&sheet_name)
6566 .map_err(|e| IoError::Excel(format!("cannot read sheet '{sheet_name}': {e}")))?;
6567
6568 let rows: Vec<Vec<calamine::Data>> = range
6569 .rows()
6570 .skip(options.skip_rows)
6571 .map(|r| r.to_vec())
6572 .collect();
6573
6574 parse_excel_rows(rows, options)
6575}
6576
6577pub fn read_excel_with_index_cols(
6578 path: &Path,
6579 options: &ExcelReadOptions,
6580 index_cols: &[&str],
6581) -> Result<DataFrame, IoError> {
6582 let frame = read_excel(path, options)?;
6583 promote_frame_index_columns(&frame, index_cols)
6584}
6585
6586pub fn read_excel_bytes(data: &[u8], options: &ExcelReadOptions) -> Result<DataFrame, IoError> {
6588 use calamine::{Reader, open_workbook_auto_from_rs};
6589
6590 let cursor = std::io::Cursor::new(data);
6591 let mut workbook = open_workbook_auto_from_rs(cursor)
6592 .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6593
6594 let sheet_name = if let Some(ref name) = options.sheet_name {
6595 name.clone()
6596 } else {
6597 let names = workbook.sheet_names();
6598 if names.is_empty() {
6599 return Err(IoError::Excel("workbook contains no sheets".into()));
6600 }
6601 names[0].clone()
6602 };
6603
6604 let range = workbook
6605 .worksheet_range(&sheet_name)
6606 .map_err(|e| IoError::Excel(format!("cannot read sheet '{sheet_name}': {e}")))?;
6607
6608 let rows: Vec<Vec<calamine::Data>> = range
6609 .rows()
6610 .skip(options.skip_rows)
6611 .map(|r| r.to_vec())
6612 .collect();
6613
6614 parse_excel_rows(rows, options)
6615}
6616
6617pub fn read_excel_bytes_with_index_cols(
6618 data: &[u8],
6619 options: &ExcelReadOptions,
6620 index_cols: &[&str],
6621) -> Result<DataFrame, IoError> {
6622 let frame = read_excel_bytes(data, options)?;
6623 promote_frame_index_columns(&frame, index_cols)
6624}
6625
6626pub fn read_excel_sheets_ordered(
6645 path: &Path,
6646 sheet_names: Option<&[String]>,
6647 options: &ExcelReadOptions,
6648) -> Result<Vec<(String, DataFrame)>, IoError> {
6649 use calamine::{Reader, open_workbook_auto};
6650
6651 let mut workbook = open_workbook_auto(path)
6652 .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6653 let available: Vec<String> = workbook.sheet_names();
6654 let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6657 let selected: Vec<String> = match sheet_names {
6658 Some(names) => {
6659 for name in names {
6660 if !available_set.contains(name.as_str()) {
6661 return Err(IoError::Excel(format!(
6662 "workbook does not contain sheet {name:?}"
6663 )));
6664 }
6665 }
6666 names.to_vec()
6667 }
6668 None => available.clone(),
6669 };
6670 if selected.is_empty() {
6671 return Err(IoError::Excel("no sheets selected".to_owned()));
6672 }
6673 let mut out = Vec::with_capacity(selected.len());
6674 for sheet in &selected {
6675 let range = workbook
6676 .worksheet_range(sheet)
6677 .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6678 let rows: Vec<Vec<calamine::Data>> = range
6679 .rows()
6680 .skip(options.skip_rows)
6681 .map(|r| r.to_vec())
6682 .collect();
6683 let frame = parse_excel_rows(rows, options)?;
6684 out.push((sheet.clone(), frame));
6685 }
6686 Ok(out)
6687}
6688
6689pub fn read_excel_sheets_ordered_bytes(
6691 data: &[u8],
6692 sheet_names: Option<&[String]>,
6693 options: &ExcelReadOptions,
6694) -> Result<Vec<(String, DataFrame)>, IoError> {
6695 use calamine::{Reader, open_workbook_auto_from_rs};
6696
6697 let cursor = std::io::Cursor::new(data);
6698 let mut workbook = open_workbook_auto_from_rs(cursor)
6699 .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6700 let available: Vec<String> = workbook.sheet_names();
6701 let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6704 let selected: Vec<String> = match sheet_names {
6705 Some(names) => {
6706 for name in names {
6707 if !available_set.contains(name.as_str()) {
6708 return Err(IoError::Excel(format!(
6709 "workbook does not contain sheet {name:?}"
6710 )));
6711 }
6712 }
6713 names.to_vec()
6714 }
6715 None => available.clone(),
6716 };
6717 if selected.is_empty() {
6718 return Err(IoError::Excel("no sheets selected".to_owned()));
6719 }
6720 let mut out = Vec::with_capacity(selected.len());
6721 for sheet in &selected {
6722 let range = workbook
6723 .worksheet_range(sheet)
6724 .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6725 let rows: Vec<Vec<calamine::Data>> = range
6726 .rows()
6727 .skip(options.skip_rows)
6728 .map(|r| r.to_vec())
6729 .collect();
6730 let frame = parse_excel_rows(rows, options)?;
6731 out.push((sheet.clone(), frame));
6732 }
6733 Ok(out)
6734}
6735
6736pub fn read_excel_sheets(
6737 path: &Path,
6738 sheet_names: Option<&[String]>,
6739 options: &ExcelReadOptions,
6740) -> Result<BTreeMap<String, DataFrame>, IoError> {
6741 use calamine::{Reader, open_workbook_auto};
6742
6743 let mut workbook = open_workbook_auto(path)
6744 .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6745 let available: Vec<String> = workbook.sheet_names();
6746 let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6749 let selected: Vec<String> = match sheet_names {
6750 Some(names) => {
6751 for name in names {
6752 if !available_set.contains(name.as_str()) {
6753 return Err(IoError::Excel(format!(
6754 "workbook does not contain sheet {name:?}"
6755 )));
6756 }
6757 }
6758 names.to_vec()
6759 }
6760 None => available.clone(),
6761 };
6762 if selected.is_empty() {
6763 return Err(IoError::Excel("no sheets selected".to_owned()));
6764 }
6765
6766 let mut out = BTreeMap::new();
6767 for sheet in &selected {
6768 let range = workbook
6769 .worksheet_range(sheet)
6770 .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6771 let rows: Vec<Vec<calamine::Data>> = range
6772 .rows()
6773 .skip(options.skip_rows)
6774 .map(|r| r.to_vec())
6775 .collect();
6776 let frame = parse_excel_rows(rows, options)?;
6777 out.insert(sheet.clone(), frame);
6778 }
6779 Ok(out)
6780}
6781
6782pub fn read_excel_sheets_bytes(
6786 data: &[u8],
6787 sheet_names: Option<&[String]>,
6788 options: &ExcelReadOptions,
6789) -> Result<BTreeMap<String, DataFrame>, IoError> {
6790 use calamine::{Reader, open_workbook_auto_from_rs};
6791
6792 let cursor = std::io::Cursor::new(data);
6793 let mut workbook = open_workbook_auto_from_rs(cursor)
6794 .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6795 let available: Vec<String> = workbook.sheet_names();
6796 let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6799 let selected: Vec<String> = match sheet_names {
6800 Some(names) => {
6801 for name in names {
6802 if !available_set.contains(name.as_str()) {
6803 return Err(IoError::Excel(format!(
6804 "workbook does not contain sheet {name:?}"
6805 )));
6806 }
6807 }
6808 names.to_vec()
6809 }
6810 None => available.clone(),
6811 };
6812 if selected.is_empty() {
6813 return Err(IoError::Excel("no sheets selected".to_owned()));
6814 }
6815
6816 let mut out = BTreeMap::new();
6817 for sheet in &selected {
6818 let range = workbook
6819 .worksheet_range(sheet)
6820 .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6821 let rows: Vec<Vec<calamine::Data>> = range
6822 .rows()
6823 .skip(options.skip_rows)
6824 .map(|r| r.to_vec())
6825 .collect();
6826 let frame = parse_excel_rows(rows, options)?;
6827 out.insert(sheet.clone(), frame);
6828 }
6829 Ok(out)
6830}
6831
6832pub fn write_excel(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6836 let bytes = write_excel_bytes(frame)?;
6837 std::fs::write(path, bytes)?;
6838 Ok(())
6839}
6840
6841fn write_excel_index_label(
6842 worksheet: &mut rust_xlsxwriter::Worksheet,
6843 excel_row: u32,
6844 excel_col: u16,
6845 label: &IndexLabel,
6846) -> Result<(), IoError> {
6847 match label {
6848 IndexLabel::Int64(v) => {
6849 worksheet
6850 .write_number(excel_row, excel_col, *v as f64)
6851 .map_err(|e| IoError::Excel(format!("write index int: {e}")))?;
6852 }
6853 IndexLabel::Utf8(s) => {
6854 worksheet
6855 .write_string(excel_row, excel_col, s.as_str())
6856 .map_err(|e| IoError::Excel(format!("write index string: {e}")))?;
6857 }
6858 IndexLabel::Timedelta64(v) => {
6859 if *v != Timedelta::NAT {
6860 worksheet
6861 .write_string(excel_row, excel_col, Timedelta::format(*v))
6862 .map_err(|e| IoError::Excel(format!("write index timedelta: {e}")))?;
6863 }
6864 }
6865 IndexLabel::Datetime64(v) => {
6866 if *v != i64::MIN {
6867 worksheet
6868 .write_string(excel_row, excel_col, label.to_string())
6869 .map_err(|e| IoError::Excel(format!("write index datetime: {e}")))?;
6870 }
6871 }
6872 IndexLabel::Null(_) => {}
6875 }
6876 Ok(())
6877}
6878
6879fn write_excel_scalar(
6880 worksheet: &mut rust_xlsxwriter::Worksheet,
6881 excel_row: u32,
6882 excel_col: u16,
6883 scalar: &Scalar,
6884) -> Result<(), IoError> {
6885 match scalar {
6886 Scalar::Int64(v) => {
6887 worksheet
6888 .write_number(excel_row, excel_col, *v as f64)
6889 .map_err(|e| IoError::Excel(format!("write int: {e}")))?;
6890 }
6891 Scalar::Float64(v) if !v.is_nan() => {
6892 worksheet
6893 .write_number(excel_row, excel_col, *v)
6894 .map_err(|e| IoError::Excel(format!("write float: {e}")))?;
6895 }
6896 Scalar::Bool(b) => {
6897 worksheet
6898 .write_boolean(excel_row, excel_col, *b)
6899 .map_err(|e| IoError::Excel(format!("write bool: {e}")))?;
6900 }
6901 Scalar::Utf8(s) => {
6902 worksheet
6903 .write_string(excel_row, excel_col, s.as_str())
6904 .map_err(|e| IoError::Excel(format!("write string: {e}")))?;
6905 }
6906 Scalar::Timedelta64(v) => {
6907 if *v != Timedelta::NAT {
6908 worksheet
6909 .write_string(excel_row, excel_col, Timedelta::format(*v))
6910 .map_err(|e| IoError::Excel(format!("write timedelta: {e}")))?;
6911 }
6912 }
6913 Scalar::Datetime64(v) => {
6914 if *v != Timestamp::NAT {
6915 worksheet
6916 .write_string(excel_row, excel_col, format_datetime_ns(*v))
6917 .map_err(|e| IoError::Excel(format!("write datetime: {e}")))?;
6918 }
6919 }
6920 Scalar::Period(v) => {
6921 if *v != i64::MIN {
6922 worksheet
6923 .write_string(excel_row, excel_col, format!("Period[{v}]"))
6924 .map_err(|e| IoError::Excel(format!("write period: {e}")))?;
6925 }
6926 }
6927 Scalar::Interval(iv) => {
6928 worksheet
6929 .write_string(excel_row, excel_col, format!("{iv}"))
6930 .map_err(|e| IoError::Excel(format!("write interval: {e}")))?;
6931 }
6932 Scalar::Float64(_) | Scalar::Null(_) => {}
6933 }
6934 Ok(())
6935}
6936
6937pub fn write_excel_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6942 write_excel_bytes_with_options(frame, &ExcelWriteOptions::default())
6943}
6944
6945#[derive(Debug, Clone)]
6950pub struct ExcelWriteOptions {
6951 pub sheet_name: String,
6953 pub index: bool,
6956 pub index_label: Option<String>,
6960 pub header: bool,
6963}
6964
6965impl Default for ExcelWriteOptions {
6966 fn default() -> Self {
6967 Self {
6968 sheet_name: "Sheet1".to_string(),
6969 index: true,
6970 index_label: None,
6971 header: true,
6972 }
6973 }
6974}
6975
6976pub fn write_excel_bytes_with_options(
6983 frame: &DataFrame,
6984 options: &ExcelWriteOptions,
6985) -> Result<Vec<u8>, IoError> {
6986 if options.index && frame.row_multiindex().is_some() {
6987 let materialized = materialize_named_row_multiindex_columns(frame)?;
6988 let mut nested_options = options.clone();
6989 nested_options.index = false;
6990 nested_options.index_label = None;
6991 return write_excel_bytes_with_options(&materialized, &nested_options);
6992 }
6993
6994 use rust_xlsxwriter::Workbook;
6995
6996 let mut workbook = Workbook::new();
6997 let worksheet = workbook.add_worksheet();
6998 worksheet
6999 .set_name(options.sheet_name.as_str())
7000 .map_err(|e| IoError::Excel(format!("set sheet name: {e}")))?;
7001
7002 let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
7003 let data_col_offset: u16 = if options.index { 1 } else { 0 };
7004
7005 if options.header {
7007 if options.index {
7008 let idx_header = options
7009 .index_label
7010 .as_deref()
7011 .unwrap_or_else(|| frame.index().name().unwrap_or(""));
7012 worksheet
7013 .write_string(0, 0, idx_header)
7014 .map_err(|e| IoError::Excel(format!("write index header: {e}")))?;
7015 }
7016 for (col_idx, name) in col_names.iter().enumerate() {
7017 worksheet
7018 .write_string(0, data_col_offset + col_idx as u16, name.as_str())
7019 .map_err(|e| IoError::Excel(format!("write header: {e}")))?;
7020 }
7021 }
7022
7023 let header_rows: u32 = if options.header { 1 } else { 0 };
7026 let nrows = frame.index().len();
7027 for row_idx in 0..nrows {
7028 let excel_row = row_idx as u32 + header_rows;
7029 if options.index
7030 && let Some(label) = frame.index().labels().get(row_idx)
7031 {
7032 write_excel_index_label(worksheet, excel_row, 0, label)?;
7033 }
7034 for (col_idx, name) in col_names.iter().enumerate() {
7035 if let Some(col) = frame.column(name)
7036 && let Some(scalar) = col.value(row_idx)
7037 {
7038 write_excel_scalar(
7039 worksheet,
7040 excel_row,
7041 data_col_offset + col_idx as u16,
7042 scalar,
7043 )?;
7044 }
7045 }
7046 }
7047
7048 let buf = workbook
7049 .save_to_buffer()
7050 .map_err(|e| IoError::Excel(format!("save workbook: {e}")))?;
7051
7052 Ok(buf)
7053}
7054
7055pub fn write_excel_with_options(
7057 frame: &DataFrame,
7058 path: &Path,
7059 options: &ExcelWriteOptions,
7060) -> Result<(), IoError> {
7061 let bytes = write_excel_bytes_with_options(frame, options)?;
7062 std::fs::write(path, bytes)?;
7063 Ok(())
7064}
7065
7066pub fn write_feather_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
7073 use arrow::ipc::writer::FileWriter;
7074
7075 let batch = dataframe_to_record_batch(frame)?;
7076 let schema = batch.schema();
7077
7078 let mut buf = Vec::new();
7079 let mut writer =
7080 FileWriter::try_new(&mut buf, &schema).map_err(|e| IoError::Arrow(e.to_string()))?;
7081 writer
7082 .write(&batch)
7083 .map_err(|e| IoError::Arrow(e.to_string()))?;
7084 writer.finish().map_err(|e| IoError::Arrow(e.to_string()))?;
7085 Ok(buf)
7086}
7087
7088pub fn read_feather_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
7092 use arrow::ipc::reader::FileReader;
7093
7094 let cursor = std::io::Cursor::new(data);
7095 let reader = FileReader::try_new(cursor, None).map_err(|e| IoError::Arrow(e.to_string()))?;
7096
7097 let mut all_frames: Vec<DataFrame> = Vec::new();
7098 for batch_result in reader {
7099 let batch = batch_result.map_err(|e| IoError::Arrow(e.to_string()))?;
7100 all_frames.push(record_batch_to_dataframe(&batch)?);
7101 }
7102
7103 if all_frames.is_empty() {
7104 return Ok(DataFrame::new_with_column_order(
7105 Index::new(vec![]),
7106 BTreeMap::new(),
7107 vec![],
7108 )?);
7109 }
7110
7111 if all_frames.len() == 1 {
7112 if let Some(frame) = all_frames.into_iter().next() {
7113 return Ok(frame);
7114 }
7115 return Err(IoError::Arrow(
7116 "feather reader produced zero record batches".to_owned(),
7117 ));
7118 }
7119
7120 let refs: Vec<&DataFrame> = all_frames.iter().collect();
7121 fp_frame::concat_dataframes(&refs).map_err(IoError::from)
7122}
7123
7124pub fn write_feather(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
7128 let bytes = write_feather_bytes(frame)?;
7129 std::fs::write(path, bytes)?;
7130 Ok(())
7131}
7132
7133pub fn read_feather(path: &Path) -> Result<DataFrame, IoError> {
7137 let data = std::fs::read(path)?;
7138 read_feather_bytes(&data)
7139}
7140
7141pub fn write_ipc_stream_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
7146 use arrow::ipc::writer::StreamWriter;
7147
7148 let batch = dataframe_to_record_batch(frame)?;
7149 let schema = batch.schema();
7150
7151 let mut buf = Vec::new();
7152 let mut writer =
7153 StreamWriter::try_new(&mut buf, &schema).map_err(|e| IoError::Arrow(e.to_string()))?;
7154 writer
7155 .write(&batch)
7156 .map_err(|e| IoError::Arrow(e.to_string()))?;
7157 writer.finish().map_err(|e| IoError::Arrow(e.to_string()))?;
7158 Ok(buf)
7159}
7160
7161pub fn read_ipc_stream_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
7163 use arrow::ipc::reader::StreamReader;
7164
7165 let cursor = std::io::Cursor::new(data);
7166 let reader = StreamReader::try_new(cursor, None).map_err(|e| IoError::Arrow(e.to_string()))?;
7167
7168 let mut all_frames: Vec<DataFrame> = Vec::new();
7169 for batch_result in reader {
7170 let batch = batch_result.map_err(|e| IoError::Arrow(e.to_string()))?;
7171 all_frames.push(record_batch_to_dataframe(&batch)?);
7172 }
7173
7174 if all_frames.is_empty() {
7175 return Ok(DataFrame::new_with_column_order(
7176 Index::new(vec![]),
7177 BTreeMap::new(),
7178 vec![],
7179 )?);
7180 }
7181
7182 if all_frames.len() == 1 {
7183 if let Some(frame) = all_frames.into_iter().next() {
7184 return Ok(frame);
7185 }
7186 return Err(IoError::Arrow(
7187 "ipc stream reader produced zero record batches".to_owned(),
7188 ));
7189 }
7190
7191 let refs: Vec<&DataFrame> = all_frames.iter().collect();
7192 fp_frame::concat_dataframes(&refs).map_err(IoError::from)
7193}
7194
7195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7199pub enum SqlIfExists {
7200 Fail,
7202 Replace,
7204 Append,
7206}
7207
7208#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7222pub enum SqlInsertMethod {
7223 #[default]
7225 Single,
7226 Multi,
7228}
7229
7230#[derive(Debug, Clone)]
7232pub struct SqlReadOptions {
7233 pub params: Option<Vec<Scalar>>,
7235 pub parse_dates: Option<Vec<String>>,
7238 pub coerce_float: bool,
7244 pub dtype: Option<BTreeMap<String, DType>>,
7254 pub schema: Option<String>,
7268 pub columns: Option<Vec<String>>,
7283 pub index_col: Option<String>,
7299}
7300
7301impl Default for SqlReadOptions {
7302 fn default() -> Self {
7307 Self {
7308 params: None,
7309 parse_dates: None,
7310 coerce_float: true,
7311 dtype: None,
7312 schema: None,
7313 columns: None,
7314 index_col: None,
7315 }
7316 }
7317}
7318
7319#[derive(Debug, Clone, PartialEq, Eq)]
7321pub struct SqlWriteOptions {
7322 pub if_exists: SqlIfExists,
7324 pub index: bool,
7326 pub index_label: Option<String>,
7328 pub schema: Option<String>,
7339 pub dtype: Option<BTreeMap<String, String>>,
7349 pub method: SqlInsertMethod,
7358 pub chunksize: Option<usize>,
7374}
7375
7376#[derive(Debug, Clone, PartialEq)]
7378pub struct SqlQueryResult {
7379 pub columns: Vec<String>,
7380 pub rows: Vec<Vec<Scalar>>,
7381}
7382
7383type SqlColumnDtypeHints = Vec<Option<DType>>;
7384type SqlMaterializedColumns = (Vec<String>, Vec<Vec<Scalar>>, SqlColumnDtypeHints);
7385
7386#[derive(Debug, Clone, PartialEq, Eq)]
7388pub struct SqlColumnSchema {
7389 pub name: String,
7390 pub declared_type: Option<String>,
7391 pub nullable: bool,
7392 pub default_value: Option<String>,
7393 pub primary_key_ordinal: Option<usize>,
7394 pub comment: Option<String>,
7405 pub autoincrement: bool,
7424}
7425
7426#[derive(Debug, Clone, PartialEq, Eq)]
7428pub struct SqlTableSchema {
7429 pub table_name: String,
7430 pub columns: Vec<SqlColumnSchema>,
7431}
7432
7433impl SqlTableSchema {
7434 pub fn column(&self, name: &str) -> Option<&SqlColumnSchema> {
7435 self.columns.iter().find(|column| column.name == name)
7436 }
7437}
7438
7439#[derive(Debug, Clone, PartialEq, Eq)]
7445pub struct SqlIndexSchema {
7446 pub name: String,
7447 pub columns: Vec<String>,
7448 pub unique: bool,
7449}
7450
7451#[derive(Debug, Clone, PartialEq, Eq)]
7461pub struct SqlUniqueConstraintSchema {
7462 pub name: String,
7463 pub columns: Vec<String>,
7464}
7465
7466#[derive(Debug, Clone, PartialEq, Eq)]
7474pub struct SqlReflectedTable {
7475 pub table_name: String,
7476 pub columns: Vec<SqlColumnSchema>,
7477 pub primary_key_columns: Vec<String>,
7478 pub indexes: Vec<SqlIndexSchema>,
7479 pub foreign_keys: Vec<SqlForeignKeySchema>,
7480 pub unique_constraints: Vec<SqlUniqueConstraintSchema>,
7481 pub comment: Option<String>,
7482}
7483
7484impl SqlReflectedTable {
7485 #[must_use]
7489 pub fn column(&self, name: &str) -> Option<&SqlColumnSchema> {
7490 self.columns.iter().find(|c| c.name == name)
7491 }
7492
7493 #[must_use]
7497 pub fn index(&self, name: &str) -> Option<&SqlIndexSchema> {
7498 self.indexes.iter().find(|i| i.name == name)
7499 }
7500
7501 #[must_use]
7507 pub fn unique_constraint(&self, name: &str) -> Option<&SqlUniqueConstraintSchema> {
7508 self.unique_constraints.iter().find(|u| u.name == name)
7509 }
7510
7511 #[must_use]
7520 pub fn foreign_keys_for_column(&self, column_name: &str) -> Vec<&SqlForeignKeySchema> {
7521 self.foreign_keys
7522 .iter()
7523 .filter(|fk| fk.columns.iter().any(|c| c == column_name))
7524 .collect()
7525 }
7526
7527 #[must_use]
7536 pub fn indexes_for_column(&self, column_name: &str) -> Vec<&SqlIndexSchema> {
7537 self.indexes
7538 .iter()
7539 .filter(|i| i.columns.iter().any(|c| c == column_name))
7540 .collect()
7541 }
7542
7543 #[must_use]
7551 pub fn unique_constraints_for_column(
7552 &self,
7553 column_name: &str,
7554 ) -> Vec<&SqlUniqueConstraintSchema> {
7555 self.unique_constraints
7556 .iter()
7557 .filter(|u| u.columns.iter().any(|c| c == column_name))
7558 .collect()
7559 }
7560}
7561
7562#[derive(Debug, Clone, PartialEq, Eq)]
7573pub struct SqlForeignKeySchema {
7574 pub constraint_name: Option<String>,
7575 pub columns: Vec<String>,
7576 pub referenced_table: String,
7577 pub referenced_columns: Vec<String>,
7578}
7579
7580pub struct SqlChunkIterator<'conn> {
7582 state: SqlChunkIteratorState<'conn>,
7583}
7584
7585enum SqlChunkIteratorState<'conn> {
7586 Materialized {
7587 headers: Vec<String>,
7588 columns: Vec<Vec<Scalar>>,
7589 dtype_hints: SqlColumnDtypeHints,
7590 chunk_size: usize,
7591 next_row: usize,
7592 },
7593 Paged {
7594 conn: &'conn dyn SqlConnection,
7595 query: String,
7596 options: SqlReadOptions,
7597 headers: Vec<String>,
7598 chunk_size: usize,
7599 next_offset: usize,
7600 finished: bool,
7601 },
7602}
7603
7604impl std::fmt::Debug for SqlChunkIterator<'_> {
7605 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7606 match &self.state {
7607 SqlChunkIteratorState::Materialized {
7608 headers,
7609 columns,
7610 dtype_hints: _,
7611 chunk_size,
7612 next_row,
7613 } => f
7614 .debug_struct("SqlChunkIterator")
7615 .field("mode", &"materialized")
7616 .field("headers", headers)
7617 .field("row_count", &columns.first().map_or(0, Vec::len))
7618 .field("chunk_size", chunk_size)
7619 .field("next_row", next_row)
7620 .finish(),
7621 SqlChunkIteratorState::Paged {
7622 query,
7623 headers,
7624 chunk_size,
7625 next_offset,
7626 finished,
7627 ..
7628 } => f
7629 .debug_struct("SqlChunkIterator")
7630 .field("mode", &"paged")
7631 .field("query", query)
7632 .field("headers", headers)
7633 .field("chunk_size", chunk_size)
7634 .field("next_offset", next_offset)
7635 .field("finished", finished)
7636 .finish(),
7637 }
7638 }
7639}
7640
7641impl<'conn> SqlChunkIterator<'conn> {
7642 fn materialized(
7643 headers: Vec<String>,
7644 columns: Vec<Vec<Scalar>>,
7645 dtype_hints: SqlColumnDtypeHints,
7646 chunk_size: usize,
7647 ) -> Self {
7648 Self {
7649 state: SqlChunkIteratorState::Materialized {
7650 headers,
7651 columns,
7652 dtype_hints,
7653 chunk_size,
7654 next_row: 0,
7655 },
7656 }
7657 }
7658
7659 fn paged<C: SqlConnection + 'conn>(
7660 conn: &'conn C,
7661 query: &str,
7662 options: &SqlReadOptions,
7663 chunk_size: usize,
7664 ) -> Result<Self, IoError> {
7665 let headers = sql_paged_query_headers(conn, query, options)?;
7666 Ok(Self {
7667 state: SqlChunkIteratorState::Paged {
7668 conn,
7669 query: sql_trim_chunk_source(query)?.to_owned(),
7670 options: options.clone(),
7671 headers,
7672 chunk_size,
7673 next_offset: 0,
7674 finished: false,
7675 },
7676 })
7677 }
7678
7679 fn headers(&self) -> &[String] {
7680 match &self.state {
7681 SqlChunkIteratorState::Materialized { headers, .. }
7682 | SqlChunkIteratorState::Paged { headers, .. } => headers,
7683 }
7684 }
7685}
7686
7687impl Iterator for SqlChunkIterator<'_> {
7688 type Item = Result<DataFrame, IoError>;
7689
7690 fn next(&mut self) -> Option<Self::Item> {
7691 match &mut self.state {
7692 SqlChunkIteratorState::Materialized {
7693 headers,
7694 columns,
7695 dtype_hints,
7696 chunk_size,
7697 next_row,
7698 } => {
7699 let row_count = columns.first().map_or(0, Vec::len);
7700 if *next_row >= row_count {
7701 return None;
7702 }
7703
7704 let start = *next_row;
7705 let end = start.saturating_add(*chunk_size).min(row_count);
7706 *next_row = end;
7707
7708 let chunk_columns = columns
7709 .iter()
7710 .map(|column| column[start..end].to_vec())
7711 .collect();
7712 Some(dataframe_from_sql_columns(
7713 headers.clone(),
7714 chunk_columns,
7715 dtype_hints.clone(),
7716 ))
7717 }
7718 SqlChunkIteratorState::Paged {
7719 conn,
7720 query,
7721 options,
7722 chunk_size,
7723 next_offset,
7724 finished,
7725 ..
7726 } => {
7727 if *finished {
7728 return None;
7729 }
7730
7731 let page =
7732 sql_query_to_columns_paged(*conn, query, options, *chunk_size, *next_offset);
7733 Some(match page {
7734 Ok((headers, columns, dtype_hints)) => {
7735 let row_count = columns.first().map_or(0, Vec::len);
7736 if row_count == 0 {
7737 *finished = true;
7738 return None;
7739 }
7740 if row_count < *chunk_size {
7741 *finished = true;
7742 }
7743 *next_offset = next_offset.saturating_add(row_count);
7744 dataframe_from_sql_columns(headers, columns, dtype_hints)
7745 }
7746 Err(err) => {
7747 *finished = true;
7748 Err(err)
7749 }
7750 })
7751 }
7752 }
7753 }
7754}
7755
7756pub struct SqlIndexedChunkIterator<'conn> {
7758 inner: SqlChunkIterator<'conn>,
7759 index_col: Option<String>,
7760}
7761
7762impl std::fmt::Debug for SqlIndexedChunkIterator<'_> {
7763 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7764 f.debug_struct("SqlIndexedChunkIterator")
7765 .field("inner", &self.inner)
7766 .field("index_col", &self.index_col)
7767 .finish()
7768 }
7769}
7770
7771impl Iterator for SqlIndexedChunkIterator<'_> {
7772 type Item = Result<DataFrame, IoError>;
7773
7774 fn next(&mut self) -> Option<Self::Item> {
7775 let chunk = self.inner.next()?;
7776 Some(match (chunk, self.index_col.as_deref()) {
7777 (Ok(frame), Some(index_col)) => apply_sql_index_col(frame, Some(index_col)),
7778 (Ok(frame), None) => Ok(frame),
7779 (Err(err), _) => Err(err),
7780 })
7781 }
7782}
7783
7784fn sql_indexed_chunks<'conn>(
7785 inner: SqlChunkIterator<'conn>,
7786 index_col: Option<&str>,
7787) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
7788 if let Some(col_name) = index_col {
7789 if col_name.is_empty() {
7790 return Err(IoError::Sql(
7791 "index_col: empty string is not a valid column name".to_owned(),
7792 ));
7793 }
7794 if !inner.headers().iter().any(|header| header == col_name) {
7795 return Err(IoError::Sql(format!(
7796 "index_col {col_name:?} not present in result columns"
7797 )));
7798 }
7799 }
7800 Ok(SqlIndexedChunkIterator {
7801 inner,
7802 index_col: index_col.map(str::to_owned),
7803 })
7804}
7805
7806pub trait SqlConnection {
7808 fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError>;
7809
7810 fn query_column_dtypes(
7816 &self,
7817 _query: &str,
7818 _params: &[Scalar],
7819 ) -> Result<Vec<Option<DType>>, IoError> {
7820 Ok(Vec::new())
7821 }
7822
7823 fn supports_paged_sql_chunks(&self) -> bool {
7828 false
7829 }
7830
7831 fn execute_batch(&self, sql: &str) -> Result<(), IoError>;
7832
7833 fn table_exists(&self, table_name: &str) -> Result<bool, IoError>;
7834
7835 fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError>;
7836
7837 fn dtype_sql(&self, dtype: DType) -> &'static str;
7838
7839 fn index_dtype_sql(&self, index: &Index) -> &'static str;
7840
7841 fn parameter_marker(&self, _ordinal: usize) -> String {
7847 "?".to_owned()
7848 }
7849
7850 fn dialect_name(&self) -> &'static str {
7865 "unknown"
7866 }
7867
7868 fn supports_returning(&self) -> bool {
7875 false
7876 }
7877
7878 fn max_param_count(&self) -> Option<usize> {
7885 None
7886 }
7887
7888 fn max_identifier_length(&self) -> Option<usize> {
7899 None
7900 }
7901
7902 fn with_transaction<T, F>(&self, f: F) -> Result<T, IoError>
7912 where
7913 F: FnOnce(&Self) -> Result<T, IoError>,
7914 Self: Sized,
7915 {
7916 f(self)
7917 }
7918
7919 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
7931 if ident.contains('\0') {
7932 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
7933 }
7934 Ok(format!("\"{}\"", ident.replace('"', "\"\"")))
7935 }
7936
7937 fn supports_schemas(&self) -> bool {
7945 false
7946 }
7947
7948 fn default_schema(&self) -> Option<String> {
7958 None
7959 }
7960
7961 fn table_exists_in_schema(
7974 &self,
7975 table_name: &str,
7976 _schema: Option<&str>,
7977 ) -> Result<bool, IoError> {
7978 self.table_exists(table_name)
7979 }
7980
7981 fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
7990 Ok(Vec::new())
7991 }
7992
7993 fn table_schema(
8002 &self,
8003 _table_name: &str,
8004 _schema: Option<&str>,
8005 ) -> Result<Option<SqlTableSchema>, IoError> {
8006 Ok(None)
8007 }
8008
8009 fn list_schemas(&self) -> Result<Vec<String>, IoError> {
8019 Ok(Vec::new())
8020 }
8021
8022 fn server_version(&self) -> Result<Option<String>, IoError> {
8033 Ok(None)
8034 }
8035
8036 fn list_views(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8047 Ok(Vec::new())
8048 }
8049
8050 fn list_indexes(
8060 &self,
8061 _table_name: &str,
8062 _schema: Option<&str>,
8063 ) -> Result<Vec<SqlIndexSchema>, IoError> {
8064 Ok(Vec::new())
8065 }
8066
8067 fn list_unique_constraints(
8077 &self,
8078 _table_name: &str,
8079 _schema: Option<&str>,
8080 ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
8081 Ok(Vec::new())
8082 }
8083
8084 fn table_comment(
8096 &self,
8097 _table_name: &str,
8098 _schema: Option<&str>,
8099 ) -> Result<Option<String>, IoError> {
8100 Ok(None)
8101 }
8102
8103 fn list_foreign_keys(
8114 &self,
8115 _table_name: &str,
8116 _schema: Option<&str>,
8117 ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
8118 Ok(Vec::new())
8119 }
8120
8121 fn primary_key_columns(
8136 &self,
8137 table_name: &str,
8138 schema: Option<&str>,
8139 ) -> Result<Vec<String>, IoError> {
8140 let Some(meta) = self.table_schema(table_name, schema)? else {
8144 return Ok(Vec::new());
8145 };
8146 Ok(primary_keys_from_schema(&meta))
8147 }
8148
8149 fn truncate_table(&self, table_name: &str, schema: Option<&str>) -> Result<(), IoError> {
8161 validate_sql_table_name(table_name)?;
8162 validate_sql_table_ref_identifier_lengths(self, table_name, schema)?;
8163 let qualified = match schema {
8164 Some(s) if self.supports_schemas() => {
8165 validate_sql_schema_name(s)?;
8166 format!(
8167 "{}.{}",
8168 self.quote_identifier(s)?,
8169 self.quote_identifier(table_name)?
8170 )
8171 }
8172 _ => self.quote_identifier(table_name)?,
8173 };
8174 self.execute_batch(&format!("DELETE FROM {qualified}"))
8175 }
8176}
8177
8178#[cfg(feature = "sql-sqlite")]
8180fn dtype_to_sql(dtype: DType) -> &'static str {
8181 match dtype {
8182 DType::Int64 | DType::Int64Nullable => "INTEGER",
8183 DType::Float64 => "REAL",
8184 DType::Utf8 => "TEXT",
8185 DType::Categorical => "TEXT",
8186 DType::Bool | DType::BoolNullable => "INTEGER",
8187 DType::Null => "TEXT",
8188 DType::Timedelta64 => "INTEGER", DType::Datetime64 => "INTEGER", DType::Period => "INTEGER", DType::Interval => "TEXT", DType::Sparse => "TEXT",
8193 }
8194}
8195
8196#[cfg(feature = "sql-sqlite")]
8197fn sqlite_decl_type_to_dtype(decl_type: &str) -> Option<DType> {
8198 let upper = decl_type.trim().to_ascii_uppercase();
8199 if upper.contains("INT") {
8200 Some(DType::Int64)
8201 } else if upper.contains("REAL") || upper.contains("FLOA") || upper.contains("DOUB") {
8202 Some(DType::Float64)
8203 } else if upper.contains("CHAR") || upper.contains("CLOB") || upper.contains("TEXT") {
8204 Some(DType::Utf8)
8205 } else {
8206 None
8207 }
8208}
8209
8210#[cfg(feature = "sql-sqlite")]
8212fn sql_value_to_scalar(value: &rusqlite::types::Value) -> Scalar {
8213 match value {
8214 rusqlite::types::Value::Null => Scalar::Null(NullKind::Null),
8215 rusqlite::types::Value::Integer(v) => Scalar::Int64(*v),
8216 rusqlite::types::Value::Real(v) => Scalar::Float64(*v),
8217 rusqlite::types::Value::Text(s) => Scalar::Utf8(s.clone()),
8218 rusqlite::types::Value::Blob(b) => Scalar::Utf8(format!("<blob:{} bytes>", b.len())),
8219 }
8220}
8221
8222#[cfg(feature = "sql-sqlite")]
8223fn sql_value_from_scalar(scalar: &Scalar) -> rusqlite::types::Value {
8224 match scalar {
8225 Scalar::Int64(v) => rusqlite::types::Value::Integer(*v),
8226 Scalar::Float64(v) => {
8227 if v.is_nan() {
8228 rusqlite::types::Value::Null
8229 } else {
8230 rusqlite::types::Value::Real(*v)
8231 }
8232 }
8233 Scalar::Bool(b) => rusqlite::types::Value::Integer(if *b { 1 } else { 0 }),
8234 Scalar::Utf8(s) => rusqlite::types::Value::Text(s.clone()),
8235 Scalar::Null(_) => rusqlite::types::Value::Null,
8236 Scalar::Timedelta64(v) => {
8237 if *v == Timedelta::NAT {
8238 rusqlite::types::Value::Null
8239 } else {
8240 rusqlite::types::Value::Integer(*v)
8241 }
8242 }
8243 Scalar::Datetime64(v) => {
8244 if *v == Timestamp::NAT {
8245 rusqlite::types::Value::Null
8246 } else {
8247 rusqlite::types::Value::Integer(*v)
8248 }
8249 }
8250 Scalar::Period(v) => {
8251 if *v == i64::MIN {
8252 rusqlite::types::Value::Null
8253 } else {
8254 rusqlite::types::Value::Integer(*v)
8255 }
8256 }
8257 Scalar::Interval(iv) => rusqlite::types::Value::Text(format!("{iv}")),
8258 }
8259}
8260
8261fn scalar_from_index_label(label: &IndexLabel) -> Scalar {
8262 match label {
8263 IndexLabel::Int64(v) => Scalar::Int64(*v),
8264 IndexLabel::Utf8(s) => Scalar::Utf8(s.clone()),
8265 IndexLabel::Null(kind) => Scalar::Null(*kind),
8267 IndexLabel::Timedelta64(v) => {
8268 if *v == Timedelta::NAT {
8269 Scalar::Null(NullKind::Null)
8270 } else {
8271 Scalar::Timedelta64(*v)
8272 }
8273 }
8274 IndexLabel::Datetime64(v) => {
8275 if *v == i64::MIN {
8276 Scalar::Null(NullKind::Null)
8277 } else {
8278 Scalar::Utf8(format_datetime_ns(*v))
8279 }
8280 }
8281 }
8282}
8283
8284#[cfg(feature = "sql-sqlite")]
8285impl SqlConnection for rusqlite::Connection {
8286 fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
8287 let mut stmt = self
8288 .prepare(query)
8289 .map_err(|e| IoError::Sql(format!("prepare failed: {e}")))?;
8290
8291 let col_count = stmt.column_count();
8292 let columns: Vec<String> = (0..col_count)
8293 .map(|i| stmt.column_name(i).unwrap_or("?").to_owned())
8294 .collect();
8295
8296 let sql_params = params.iter().map(sql_value_from_scalar).collect::<Vec<_>>();
8297 let mut rows = stmt
8298 .query(rusqlite::params_from_iter(sql_params.iter()))
8299 .map_err(|e| IoError::Sql(format!("query failed: {e}")))?;
8300
8301 let mut out_rows = Vec::new();
8302 while let Some(row) = rows
8303 .next()
8304 .map_err(|e| IoError::Sql(format!("row fetch failed: {e}")))?
8305 {
8306 let mut values = Vec::with_capacity(col_count);
8307 for col_idx in 0..col_count {
8308 let value: rusqlite::types::Value = row
8309 .get(col_idx)
8310 .map_err(|e| IoError::Sql(format!("cell read failed: {e}")))?;
8311 values.push(sql_value_to_scalar(&value));
8312 }
8313 out_rows.push(values);
8314 }
8315
8316 Ok(SqlQueryResult {
8317 columns,
8318 rows: out_rows,
8319 })
8320 }
8321
8322 fn query_column_dtypes(
8323 &self,
8324 query: &str,
8325 _params: &[Scalar],
8326 ) -> Result<Vec<Option<DType>>, IoError> {
8327 let stmt = self
8328 .prepare(query)
8329 .map_err(|e| IoError::Sql(format!("prepare failed: {e}")))?;
8330 Ok(stmt
8331 .columns()
8332 .into_iter()
8333 .map(|column| column.decl_type().and_then(sqlite_decl_type_to_dtype))
8334 .collect())
8335 }
8336
8337 fn supports_paged_sql_chunks(&self) -> bool {
8338 true
8339 }
8340
8341 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
8342 rusqlite::Connection::execute_batch(self, sql)
8343 .map_err(|e| IoError::Sql(format!("execute_batch failed: {e}")))
8344 }
8345
8346 fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
8347 self.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?1")
8348 .and_then(|mut stmt| stmt.exists(rusqlite::params![table_name]))
8349 .map_err(|e| IoError::Sql(format!("existence check failed: {e}")))
8350 }
8351
8352 fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
8353 let tx = self
8354 .unchecked_transaction()
8355 .map_err(|e| IoError::Sql(format!("begin transaction failed: {e}")))?;
8356
8357 {
8358 let mut stmt = tx
8359 .prepare_cached(insert_sql)
8360 .map_err(|e| IoError::Sql(format!("prepare insert failed: {e}")))?;
8361
8362 for (row_idx, row_values) in rows.iter().enumerate() {
8363 let params = row_values
8364 .iter()
8365 .map(sql_value_from_scalar)
8366 .collect::<Vec<_>>();
8367 stmt.execute(rusqlite::params_from_iter(params.iter()))
8368 .map_err(|e| IoError::Sql(format!("insert row {row_idx} failed: {e}")))?;
8369 }
8370 }
8371
8372 tx.commit()
8373 .map_err(|e| IoError::Sql(format!("commit failed: {e}")))?;
8374 Ok(())
8375 }
8376
8377 fn dtype_sql(&self, dtype: DType) -> &'static str {
8378 dtype_to_sql(dtype)
8379 }
8380
8381 fn index_dtype_sql(&self, index: &Index) -> &'static str {
8382 sql_dtype_from_index(index)
8383 }
8384
8385 fn dialect_name(&self) -> &'static str {
8387 "sqlite"
8388 }
8389
8390 fn supports_returning(&self) -> bool {
8391 true
8395 }
8396
8397 fn max_param_count(&self) -> Option<usize> {
8398 Some(32766)
8402 }
8403
8404 fn with_transaction<T, F>(&self, f: F) -> Result<T, IoError>
8405 where
8406 F: FnOnce(&Self) -> Result<T, IoError>,
8407 Self: Sized,
8408 {
8409 struct RollbackOnDrop<'conn> {
8410 conn: &'conn rusqlite::Connection,
8411 active: bool,
8412 }
8413
8414 impl Drop for RollbackOnDrop<'_> {
8415 fn drop(&mut self) {
8416 if self.active {
8417 let _ = rusqlite::Connection::execute_batch(self.conn, "ROLLBACK");
8418 }
8419 }
8420 }
8421
8422 self.execute_batch("BEGIN")
8429 .map_err(|e| IoError::Sql(format!("begin transaction failed: {e}")))?;
8430 let mut rollback = RollbackOnDrop {
8431 conn: self,
8432 active: true,
8433 };
8434 match f(self) {
8435 Ok(result) => {
8436 self.execute_batch("COMMIT")
8437 .map_err(|e| IoError::Sql(format!("commit transaction failed: {e}")))?;
8438 rollback.active = false;
8439 Ok(result)
8440 }
8441 Err(err) => {
8442 if self.execute_batch("ROLLBACK").is_ok() {
8447 rollback.active = false;
8448 }
8449 Err(err)
8450 }
8451 }
8452 }
8453
8454 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
8455 quote_sql_ident(ident)
8460 }
8461
8462 fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8463 let mut stmt = self
8473 .prepare(
8474 r"SELECT name FROM sqlite_master
8475 WHERE type='table' AND name NOT LIKE 'sqlite\_%' ESCAPE '\'
8476 ORDER BY name",
8477 )
8478 .map_err(|e| IoError::Sql(format!("list_tables prepare failed: {e}")))?;
8479 let names = stmt
8480 .query_map([], |row| row.get::<_, String>(0))
8481 .map_err(|e| IoError::Sql(format!("list_tables query failed: {e}")))?
8482 .collect::<Result<Vec<_>, _>>()
8483 .map_err(|e| IoError::Sql(format!("list_tables row read failed: {e}")))?;
8484 Ok(names)
8485 }
8486
8487 fn list_views(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8488 let mut stmt = self
8493 .prepare(
8494 r"SELECT name FROM sqlite_master
8495 WHERE type='view' AND name NOT LIKE 'sqlite\_%' ESCAPE '\'
8496 ORDER BY name",
8497 )
8498 .map_err(|e| IoError::Sql(format!("list_views prepare failed: {e}")))?;
8499 let names = stmt
8500 .query_map([], |row| row.get::<_, String>(0))
8501 .map_err(|e| IoError::Sql(format!("list_views query failed: {e}")))?
8502 .collect::<Result<Vec<_>, _>>()
8503 .map_err(|e| IoError::Sql(format!("list_views row read failed: {e}")))?;
8504 Ok(names)
8505 }
8506
8507 fn table_schema(
8508 &self,
8509 table_name: &str,
8510 _schema: Option<&str>,
8511 ) -> Result<Option<SqlTableSchema>, IoError> {
8512 validate_sql_table_name(table_name)?;
8516 let pragma = format!("PRAGMA table_info(\"{}\")", table_name.replace('"', "\"\""));
8518 let mut stmt = self
8519 .prepare(&pragma)
8520 .map_err(|e| IoError::Sql(format!("table_schema prepare failed: {e}")))?;
8521 type ColumnInfoRow = (String, Option<String>, i64, Option<String>, i64);
8525 let raw_rows: Vec<ColumnInfoRow> = stmt
8526 .query_map([], |row| {
8527 Ok((
8528 row.get::<_, String>(1)?,
8529 row.get::<_, Option<String>>(2)?,
8530 row.get::<_, i64>(3)?,
8531 row.get::<_, Option<String>>(4)?,
8532 row.get::<_, i64>(5)?,
8533 ))
8534 })
8535 .map_err(|e| IoError::Sql(format!("table_schema query failed: {e}")))?
8536 .collect::<Result<Vec<_>, _>>()
8537 .map_err(|e| IoError::Sql(format!("table_schema row read failed: {e}")))?;
8538
8539 let pk_count = raw_rows.iter().filter(|(_, _, _, _, pk)| *pk > 0).count();
8546 let single_pk = pk_count == 1;
8547
8548 let mut columns: Vec<SqlColumnSchema> = Vec::with_capacity(raw_rows.len());
8549 for (name, declared, notnull, dflt, pk) in raw_rows {
8550 let cleaned_type = declared.filter(|s| !s.is_empty());
8551 let autoincrement = single_pk
8552 && pk == 1
8553 && cleaned_type
8554 .as_deref()
8555 .map(|t| t.eq_ignore_ascii_case("INTEGER"))
8556 .unwrap_or(false);
8557 columns.push(SqlColumnSchema {
8558 name,
8559 declared_type: cleaned_type,
8560 nullable: notnull == 0,
8561 default_value: dflt,
8562 primary_key_ordinal: if pk > 0 {
8563 Some(usize::try_from(pk - 1).unwrap_or(0))
8564 } else {
8565 None
8566 },
8567 comment: None,
8568 autoincrement,
8569 });
8570 }
8571 if columns.is_empty() {
8572 Ok(None)
8576 } else {
8577 Ok(Some(SqlTableSchema {
8578 table_name: table_name.to_owned(),
8579 columns,
8580 }))
8581 }
8582 }
8583
8584 fn server_version(&self) -> Result<Option<String>, IoError> {
8585 let version: String = self
8588 .query_row("SELECT sqlite_version()", [], |row| row.get(0))
8589 .map_err(|e| IoError::Sql(format!("server_version query failed: {e}")))?;
8590 Ok(Some(version))
8591 }
8592
8593 fn list_indexes(
8594 &self,
8595 table_name: &str,
8596 _schema: Option<&str>,
8597 ) -> Result<Vec<SqlIndexSchema>, IoError> {
8598 validate_sql_table_name(table_name)?;
8599 let pragma_list = format!("PRAGMA index_list(\"{}\")", table_name.replace('"', "\"\""));
8604 let mut list_stmt = self
8605 .prepare(&pragma_list)
8606 .map_err(|e| IoError::Sql(format!("list_indexes prepare failed: {e}")))?;
8607 let index_meta = list_stmt
8608 .query_map([], |row| {
8609 Ok((
8610 row.get::<_, String>(1)?, row.get::<_, i64>(2)?, row.get::<_, String>(3)?, ))
8614 })
8615 .map_err(|e| IoError::Sql(format!("list_indexes query failed: {e}")))?
8616 .collect::<Result<Vec<_>, _>>()
8617 .map_err(|e| IoError::Sql(format!("list_indexes row read failed: {e}")))?;
8618
8619 let mut indexes = Vec::new();
8620 for (name, uniq, origin) in index_meta {
8621 if origin == "pk" {
8622 continue;
8624 }
8625 if origin == "u" {
8626 continue;
8632 }
8633 let pragma_info = format!("PRAGMA index_info(\"{}\")", name.replace('"', "\"\""));
8637 let mut info_stmt = self
8638 .prepare(&pragma_info)
8639 .map_err(|e| IoError::Sql(format!("index_info prepare failed: {e}")))?;
8640 let cols = info_stmt
8641 .query_map([], |row| {
8642 Ok((row.get::<_, i64>(0)?, row.get::<_, Option<String>>(2)?))
8643 })
8644 .map_err(|e| IoError::Sql(format!("index_info query failed: {e}")))?
8645 .collect::<Result<Vec<_>, _>>()
8646 .map_err(|e| IoError::Sql(format!("index_info row read failed: {e}")))?;
8647 if cols.iter().any(|(_, c)| c.is_none()) {
8649 continue;
8650 }
8651 let mut sorted: Vec<(i64, String)> = cols
8652 .into_iter()
8653 .map(|(seq, c)| (seq, c.unwrap_or_default()))
8654 .collect();
8655 sorted.sort_by_key(|(seq, _)| *seq);
8656 indexes.push(SqlIndexSchema {
8657 name,
8658 columns: sorted.into_iter().map(|(_, c)| c).collect(),
8659 unique: uniq != 0,
8660 });
8661 }
8662 Ok(indexes)
8663 }
8664
8665 fn list_unique_constraints(
8666 &self,
8667 table_name: &str,
8668 _schema: Option<&str>,
8669 ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
8670 validate_sql_table_name(table_name)?;
8671 let pragma_list = format!("PRAGMA index_list(\"{}\")", table_name.replace('"', "\"\""));
8676 let mut list_stmt = self
8677 .prepare(&pragma_list)
8678 .map_err(|e| IoError::Sql(format!("list_unique_constraints prepare failed: {e}")))?;
8679 let candidates = list_stmt
8680 .query_map([], |row| {
8681 Ok((
8682 row.get::<_, String>(1)?, row.get::<_, String>(3)?, ))
8685 })
8686 .map_err(|e| IoError::Sql(format!("list_unique_constraints query failed: {e}")))?
8687 .collect::<Result<Vec<_>, _>>()
8688 .map_err(|e| IoError::Sql(format!("list_unique_constraints row read failed: {e}")))?;
8689
8690 let mut constraints = Vec::new();
8691 for (name, origin) in candidates {
8692 if origin != "u" {
8693 continue;
8694 }
8695 let pragma_info = format!("PRAGMA index_info(\"{}\")", name.replace('"', "\"\""));
8696 let mut info_stmt = self
8697 .prepare(&pragma_info)
8698 .map_err(|e| IoError::Sql(format!("uq index_info prepare failed: {e}")))?;
8699 let cols = info_stmt
8700 .query_map([], |row| {
8701 Ok((row.get::<_, i64>(0)?, row.get::<_, Option<String>>(2)?))
8702 })
8703 .map_err(|e| IoError::Sql(format!("uq index_info query failed: {e}")))?
8704 .collect::<Result<Vec<_>, _>>()
8705 .map_err(|e| IoError::Sql(format!("uq index_info row read failed: {e}")))?;
8706 if cols.iter().any(|(_, c)| c.is_none()) {
8708 continue;
8709 }
8710 let mut sorted: Vec<(i64, String)> = cols
8711 .into_iter()
8712 .map(|(seq, c)| (seq, c.unwrap_or_default()))
8713 .collect();
8714 sorted.sort_by_key(|(seq, _)| *seq);
8715 constraints.push(SqlUniqueConstraintSchema {
8716 name,
8717 columns: sorted.into_iter().map(|(_, c)| c).collect(),
8718 });
8719 }
8720 Ok(constraints)
8721 }
8722
8723 fn list_foreign_keys(
8724 &self,
8725 table_name: &str,
8726 _schema: Option<&str>,
8727 ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
8728 type FkRow = (i64, String, String, Option<String>);
8733
8734 validate_sql_table_name(table_name)?;
8735 let pragma = format!(
8739 "PRAGMA foreign_key_list(\"{}\")",
8740 table_name.replace('"', "\"\"")
8741 );
8742 let mut stmt = self
8743 .prepare(&pragma)
8744 .map_err(|e| IoError::Sql(format!("list_foreign_keys prepare failed: {e}")))?;
8745 let rows: Vec<(i64, FkRow)> = stmt
8746 .query_map([], |row| {
8747 Ok((
8748 row.get::<_, i64>(0)?, (
8750 row.get::<_, i64>(1)?, row.get::<_, String>(2)?, row.get::<_, String>(3)?, row.get::<_, Option<String>>(4)?, ),
8755 ))
8756 })
8757 .map_err(|e| IoError::Sql(format!("list_foreign_keys query failed: {e}")))?
8758 .collect::<Result<Vec<_>, _>>()
8759 .map_err(|e| IoError::Sql(format!("list_foreign_keys row read failed: {e}")))?;
8760
8761 let mut order: Vec<i64> = Vec::new();
8763 let mut grouped: std::collections::BTreeMap<i64, Vec<FkRow>> =
8764 std::collections::BTreeMap::new();
8765 for (id, fk_row) in rows {
8766 let (seq, ref_table, from_col, to_col) = fk_row;
8767 if !grouped.contains_key(&id) {
8768 order.push(id);
8769 }
8770 grouped
8771 .entry(id)
8772 .or_default()
8773 .push((seq, ref_table, from_col, to_col));
8774 }
8775
8776 let mut fks = Vec::with_capacity(order.len());
8777 for id in order {
8778 let mut group = grouped.remove(&id).unwrap_or_default();
8779 group.sort_by_key(|(seq, _, _, _)| *seq);
8780 let ref_table = group
8781 .first()
8782 .map(|(_, t, _, _)| t.clone())
8783 .unwrap_or_default();
8784 let mut columns = Vec::with_capacity(group.len());
8785 let mut referenced_columns: Vec<Option<String>> = Vec::with_capacity(group.len());
8786 for (_, _, from_col, to_col) in group {
8787 columns.push(from_col);
8788 referenced_columns.push(to_col);
8789 }
8790 let resolved_columns: Vec<String> = if referenced_columns.iter().all(Option::is_none) {
8797 let pk = self.primary_key_columns(&ref_table, None)?;
8799 if pk.len() == columns.len() {
8800 pk
8801 } else {
8802 continue;
8807 }
8808 } else if referenced_columns.iter().all(Option::is_some) {
8809 referenced_columns.into_iter().flatten().collect()
8811 } else {
8812 continue;
8816 };
8817 fks.push(SqlForeignKeySchema {
8818 constraint_name: None,
8821 columns,
8822 referenced_table: ref_table,
8823 referenced_columns: resolved_columns,
8824 });
8825 }
8826 Ok(fks)
8827 }
8828}
8829
8830#[cfg(feature = "sql-sqlite")]
8831fn sql_dtype_from_index(index: &Index) -> &'static str {
8832 for label in index.labels() {
8833 match label {
8834 IndexLabel::Int64(_) => return "INTEGER",
8835 IndexLabel::Utf8(_) => return "TEXT",
8836 IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "INTEGER",
8837 IndexLabel::Datetime64(v) if *v != i64::MIN => return "TEXT",
8838 _ => {}
8839 }
8840 }
8841 "TEXT"
8842}
8843
8844fn resolve_sql_index_label(
8845 frame: &DataFrame,
8846 options: &SqlWriteOptions,
8847) -> Result<Option<String>, IoError> {
8848 if !options.index {
8849 return Ok(None);
8850 }
8851
8852 let label = options
8853 .index_label
8854 .clone()
8855 .or_else(|| frame.index().name().map(str::to_owned))
8856 .unwrap_or_else(|| "index".to_owned());
8857
8858 if frame.column(&label).is_some() {
8859 return Err(IoError::DuplicateColumnName(label));
8860 }
8861
8862 Ok(Some(label))
8863}
8864
8865#[cfg(feature = "sql-sqlite")]
8870fn escape_sql_ident(name: &str) -> Result<String, IoError> {
8871 if name.contains('\0') {
8872 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
8873 }
8874 Ok(name.replace('"', "\"\""))
8875}
8876
8877#[cfg(feature = "sql-sqlite")]
8878fn quote_sql_ident(name: &str) -> Result<String, IoError> {
8879 Ok(format!("\"{}\"", escape_sql_ident(name)?))
8880}
8881
8882fn validate_sql_ident(name: &str, kind: &str) -> Result<(), IoError> {
8890 if name.is_empty() || !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
8891 return Err(IoError::Sql(format!(
8892 "invalid {kind} name: '{name}' (must be non-empty, only alphanumeric and underscore allowed)"
8893 )));
8894 }
8895 Ok(())
8896}
8897
8898fn validate_sql_table_name(table_name: &str) -> Result<(), IoError> {
8899 validate_sql_ident(table_name, "table")
8900}
8901
8902fn validate_sql_schema_name(schema: &str) -> Result<(), IoError> {
8907 validate_sql_ident(schema, "schema")
8908}
8909
8910fn validate_sql_identifier_length(
8919 name: &str,
8920 max: Option<usize>,
8921 kind: &str,
8922) -> Result<(), IoError> {
8923 if let Some(limit) = max
8924 && name.len() > limit
8925 {
8926 return Err(IoError::Sql(format!(
8927 "invalid {kind} name '{name}': length {len} exceeds backend identifier limit ({limit})",
8928 len = name.len()
8929 )));
8930 }
8931 Ok(())
8932}
8933
8934fn validate_sql_table_ref_identifier_lengths<C: SqlConnection + ?Sized>(
8935 conn: &C,
8936 table_name: &str,
8937 schema: Option<&str>,
8938) -> Result<(), IoError> {
8939 let max = conn.max_identifier_length();
8940 validate_sql_identifier_length(table_name, max, "table")?;
8941 if let Some(s) = schema {
8942 validate_sql_identifier_length(s, max, "schema")?;
8943 }
8944 Ok(())
8945}
8946
8947fn validate_sql_column_identifier_lengths<C, I, S>(conn: &C, names: I) -> Result<(), IoError>
8948where
8949 C: SqlConnection + ?Sized,
8950 I: IntoIterator<Item = S>,
8951 S: AsRef<str>,
8952{
8953 let max = conn.max_identifier_length();
8954 for name in names {
8955 validate_sql_identifier_length(name.as_ref(), max, "column")?;
8956 }
8957 Ok(())
8958}
8959
8960fn sql_select_all_query<C: SqlConnection>(conn: &C, table_name: &str) -> Result<String, IoError> {
8961 sql_select_all_query_in_schema(conn, table_name, None)
8962}
8963
8964fn sql_select_all_query_in_schema<C: SqlConnection>(
8972 conn: &C,
8973 table_name: &str,
8974 schema: Option<&str>,
8975) -> Result<String, IoError> {
8976 validate_sql_table_name(table_name)?;
8977 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
8978 let qualified = match schema {
8979 Some(s) => {
8980 validate_sql_schema_name(s)?;
8981 if !conn.supports_schemas() {
8982 return Err(IoError::Sql(format!(
8983 "read_sql_table: schema is not supported by {} backend",
8984 conn.dialect_name()
8985 )));
8986 }
8987 format!(
8988 "{}.{}",
8989 conn.quote_identifier(s)?,
8990 conn.quote_identifier(table_name)?
8991 )
8992 }
8993 _ => conn.quote_identifier(table_name)?,
8994 };
8995 Ok(format!("SELECT * FROM {qualified}"))
8996}
8997
8998fn validate_sql_column_name(column_name: &str) -> Result<(), IoError> {
8999 validate_sql_ident(column_name, "column")
9000}
9001
9002fn sql_select_columns_query<C: SqlConnection>(
9003 conn: &C,
9004 table_name: &str,
9005 columns: &[&str],
9006) -> Result<String, IoError> {
9007 sql_select_columns_query_in_schema(conn, table_name, None, columns)
9008}
9009
9010fn sql_select_columns_query_in_schema<C: SqlConnection>(
9019 conn: &C,
9020 table_name: &str,
9021 schema: Option<&str>,
9022 columns: &[&str],
9023) -> Result<String, IoError> {
9024 validate_sql_table_name(table_name)?;
9025 if columns.is_empty() {
9026 return Err(IoError::Sql(
9027 "read_sql_table_columns: columns must be non-empty".to_owned(),
9028 ));
9029 }
9030 for name in columns {
9031 validate_sql_column_name(name)?;
9032 }
9033 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9034 validate_sql_column_identifier_lengths(conn, columns)?;
9035
9036 let qualified = match schema {
9037 Some(s) => {
9038 validate_sql_schema_name(s)?;
9039 if !conn.supports_schemas() {
9040 return Err(IoError::Sql(format!(
9041 "read_sql_table: schema is not supported by {} backend",
9042 conn.dialect_name()
9043 )));
9044 }
9045 format!(
9046 "{}.{}",
9047 conn.quote_identifier(s)?,
9048 conn.quote_identifier(table_name)?
9049 )
9050 }
9051 _ => conn.quote_identifier(table_name)?,
9052 };
9053 let projection: Vec<String> = columns
9054 .iter()
9055 .map(|name| conn.quote_identifier(name))
9056 .collect::<Result<_, _>>()?;
9057 Ok(format!(
9058 "SELECT {} FROM {}",
9059 projection.join(", "),
9060 qualified
9061 ))
9062}
9063
9064fn sql_column_definition<C: SqlConnection>(
9065 conn: &C,
9066 column_name: &str,
9067 sql_type: &str,
9068) -> Result<String, IoError> {
9069 Ok(format!(
9070 "{} {sql_type}",
9071 conn.quote_identifier(column_name)?
9072 ))
9073}
9074
9075#[cfg(any(feature = "sql-postgresql", feature = "sql-mysql"))]
9080use std::cell::RefCell;
9081
9082#[cfg(feature = "sql-postgresql")]
9085pub struct PostgresConnection {
9086 client: RefCell<postgres::Client>,
9087}
9088
9089#[cfg(feature = "sql-postgresql")]
9090impl PostgresConnection {
9091 pub fn new(client: postgres::Client) -> Self {
9092 Self {
9093 client: RefCell::new(client),
9094 }
9095 }
9096}
9097
9098#[cfg(feature = "sql-postgresql")]
9099impl SqlConnection for PostgresConnection {
9100 fn query(&self, query_str: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
9101 use postgres::types::ToSql;
9102
9103 let pg_params: Vec<Box<dyn ToSql + Sync>> = params
9104 .iter()
9105 .map(|s| -> Box<dyn ToSql + Sync> {
9106 match s {
9107 Scalar::Null(_) => Box::new(Option::<i64>::None),
9108 Scalar::Bool(b) => Box::new(*b),
9109 Scalar::Int64(i) => Box::new(*i),
9110 Scalar::Float64(f) => Box::new(*f),
9111 Scalar::Utf8(s) => Box::new(s.clone()),
9112 _ => Box::new(Option::<i64>::None),
9113 }
9114 })
9115 .collect();
9116
9117 let param_refs: Vec<&(dyn ToSql + Sync)> = pg_params.iter().map(|b| b.as_ref()).collect();
9118 let rows = self
9119 .client
9120 .borrow_mut()
9121 .query(query_str, ¶m_refs)
9122 .map_err(|e| IoError::Sql(format!("PostgreSQL query failed: {e}")))?;
9123
9124 if rows.is_empty() {
9125 return Ok(SqlQueryResult {
9126 columns: Vec::new(),
9127 rows: Vec::new(),
9128 });
9129 }
9130
9131 let columns: Vec<String> = rows[0]
9132 .columns()
9133 .iter()
9134 .map(|c| c.name().to_owned())
9135 .collect();
9136
9137 let mut out_rows = Vec::new();
9138 for row in &rows {
9139 let mut values = Vec::new();
9140 for idx in 0..row.len() {
9141 let value = pg_value_to_scalar(row, idx);
9142 values.push(value);
9143 }
9144 out_rows.push(values);
9145 }
9146
9147 Ok(SqlQueryResult {
9148 columns,
9149 rows: out_rows,
9150 })
9151 }
9152
9153 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
9154 self.client
9155 .borrow_mut()
9156 .batch_execute(sql)
9157 .map_err(|e| IoError::Sql(format!("PostgreSQL batch execute failed: {e}")))
9158 }
9159
9160 fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
9161 let rows = self
9162 .client
9163 .borrow_mut()
9164 .query(
9165 "SELECT 1 FROM information_schema.tables WHERE table_name = $1 LIMIT 1",
9166 &[&table_name],
9167 )
9168 .map_err(|e| IoError::Sql(format!("PostgreSQL table_exists failed: {e}")))?;
9169 Ok(!rows.is_empty())
9170 }
9171
9172 fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
9173 let mut client = self.client.borrow_mut();
9174 for row in rows {
9175 let pg_params: Vec<Box<dyn postgres::types::ToSql + Sync>> = row
9176 .iter()
9177 .map(|s| -> Box<dyn postgres::types::ToSql + Sync> {
9178 match s {
9179 Scalar::Null(_) => Box::new(Option::<i64>::None),
9180 Scalar::Bool(b) => Box::new(*b),
9181 Scalar::Int64(i) => Box::new(*i),
9182 Scalar::Float64(f) => Box::new(*f),
9183 Scalar::Utf8(s) => Box::new(s.clone()),
9184 _ => Box::new(Option::<i64>::None),
9185 }
9186 })
9187 .collect();
9188 let param_refs: Vec<&(dyn postgres::types::ToSql + Sync)> =
9189 pg_params.iter().map(|b| b.as_ref()).collect();
9190 client
9191 .execute(insert_sql, ¶m_refs)
9192 .map_err(|e| IoError::Sql(format!("PostgreSQL insert failed: {e}")))?;
9193 }
9194 Ok(())
9195 }
9196
9197 fn dtype_sql(&self, dtype: DType) -> &'static str {
9198 match dtype {
9199 DType::Bool | DType::BoolNullable => "BOOLEAN",
9200 DType::Int64 | DType::Int64Nullable => "BIGINT",
9201 DType::Float64 => "DOUBLE PRECISION",
9202 DType::Utf8 => "TEXT",
9203 DType::Datetime64 => "TIMESTAMP",
9204 DType::Timedelta64 => "INTERVAL",
9205 _ => "TEXT",
9206 }
9207 }
9208
9209 fn index_dtype_sql(&self, index: &Index) -> &'static str {
9210 pg_sql_dtype_from_index(index)
9211 }
9212
9213 fn dialect_name(&self) -> &'static str {
9214 "postgresql"
9215 }
9216
9217 fn parameter_marker(&self, ordinal: usize) -> String {
9218 format!("${ordinal}")
9219 }
9220
9221 fn supports_returning(&self) -> bool {
9222 true
9223 }
9224
9225 fn max_param_count(&self) -> Option<usize> {
9226 Some(65535)
9227 }
9228
9229 fn supports_schemas(&self) -> bool {
9230 true
9231 }
9232
9233 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
9234 if ident.contains('\0') {
9235 return Err(IoError::Sql("invalid identifier: NUL byte".to_owned()));
9236 }
9237 Ok(format!("\"{}\"", ident.replace('"', "\"\"")))
9238 }
9239
9240 fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
9241 let schema = schema.unwrap_or("public");
9242 let rows = self
9243 .client
9244 .borrow_mut()
9245 .query(
9246 "SELECT table_name FROM information_schema.tables WHERE table_schema = $1 ORDER BY table_name",
9247 &[&schema],
9248 )
9249 .map_err(|e| IoError::Sql(format!("PostgreSQL list_tables failed: {e}")))?;
9250 Ok(rows.iter().map(|r| r.get(0)).collect())
9251 }
9252
9253 fn list_schemas(&self) -> Result<Vec<String>, IoError> {
9254 let rows = self
9255 .client
9256 .borrow_mut()
9257 .query(
9258 "SELECT schema_name FROM information_schema.schemata ORDER BY schema_name",
9259 &[],
9260 )
9261 .map_err(|e| IoError::Sql(format!("PostgreSQL list_schemas failed: {e}")))?;
9262 Ok(rows.iter().map(|r| r.get(0)).collect())
9263 }
9264}
9265
9266#[cfg(feature = "sql-postgresql")]
9267fn pg_sql_dtype_from_index(index: &Index) -> &'static str {
9268 for label in index.labels() {
9269 match label {
9270 IndexLabel::Int64(_) => return "BIGINT",
9271 IndexLabel::Utf8(_) => return "TEXT",
9272 IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "INTERVAL",
9273 IndexLabel::Datetime64(v) if *v != i64::MIN => return "TIMESTAMP",
9274 _ => {}
9275 }
9276 }
9277 "TEXT"
9278}
9279
9280#[cfg(feature = "sql-postgresql")]
9281fn pg_value_to_scalar(row: &postgres::Row, idx: usize) -> Scalar {
9282 if let Ok(Some(v)) = row.try_get::<_, Option<bool>>(idx) {
9283 return Scalar::Bool(v);
9284 }
9285 if let Ok(Some(v)) = row.try_get::<_, Option<i64>>(idx) {
9286 return Scalar::Int64(v);
9287 }
9288 if let Ok(Some(v)) = row.try_get::<_, Option<i32>>(idx) {
9289 return Scalar::Int64(i64::from(v));
9290 }
9291 if let Ok(Some(v)) = row.try_get::<_, Option<f64>>(idx) {
9292 return Scalar::Float64(v);
9293 }
9294 if let Ok(Some(v)) = row.try_get::<_, Option<f32>>(idx) {
9295 return Scalar::Float64(f64::from(v));
9296 }
9297 if let Ok(Some(v)) = row.try_get::<_, Option<String>>(idx) {
9298 return Scalar::Utf8(v);
9299 }
9300 Scalar::Null(crate::NullKind::Null)
9301}
9302
9303#[cfg(feature = "sql-mysql")]
9310pub struct MysqlConnection {
9311 conn: RefCell<mysql::Conn>,
9312}
9313
9314#[cfg(feature = "sql-mysql")]
9315impl MysqlConnection {
9316 pub fn new(conn: mysql::Conn) -> Self {
9317 Self {
9318 conn: RefCell::new(conn),
9319 }
9320 }
9321}
9322
9323#[cfg(feature = "sql-mysql")]
9324impl SqlConnection for MysqlConnection {
9325 fn query(&self, query_str: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
9326 use mysql::prelude::*;
9327
9328 let mysql_params: Vec<mysql::Value> = params.iter().map(scalar_to_mysql_value).collect();
9329 let result: Vec<mysql::Row> = self
9330 .conn
9331 .borrow_mut()
9332 .exec(query_str, mysql_params)
9333 .map_err(|e| IoError::Sql(format!("MySQL query failed: {e}")))?;
9334
9335 if result.is_empty() {
9336 return Ok(SqlQueryResult {
9337 columns: Vec::new(),
9338 rows: Vec::new(),
9339 });
9340 }
9341
9342 let columns: Vec<String> = result[0]
9343 .columns_ref()
9344 .iter()
9345 .map(|c| c.name_str().to_string())
9346 .collect();
9347
9348 let mut out_rows = Vec::new();
9349 for row in &result {
9350 let mut values = Vec::new();
9351 for idx in 0..row.len() {
9352 let value = mysql_value_to_scalar(row.get(idx));
9353 values.push(value);
9354 }
9355 out_rows.push(values);
9356 }
9357
9358 Ok(SqlQueryResult {
9359 columns,
9360 rows: out_rows,
9361 })
9362 }
9363
9364 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
9365 use mysql::prelude::*;
9366 let mut conn = self.conn.borrow_mut();
9367 for statement in sql.split(';').filter(|s| !s.trim().is_empty()) {
9368 conn.query_drop(statement)
9369 .map_err(|e| IoError::Sql(format!("MySQL execute failed: {e}")))?;
9370 }
9371 Ok(())
9372 }
9373
9374 fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
9375 use mysql::prelude::*;
9376 let result: Option<(i32,)> = self
9377 .conn
9378 .borrow_mut()
9379 .exec_first(
9380 "SELECT 1 FROM information_schema.tables WHERE table_name = ? LIMIT 1",
9381 (table_name,),
9382 )
9383 .map_err(|e| IoError::Sql(format!("MySQL table_exists failed: {e}")))?;
9384 Ok(result.is_some())
9385 }
9386
9387 fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
9388 use mysql::prelude::*;
9389 let mut conn = self.conn.borrow_mut();
9390 for row in rows {
9391 let params: Vec<mysql::Value> = row.iter().map(scalar_to_mysql_value).collect();
9392 conn.exec_drop(insert_sql, params)
9393 .map_err(|e| IoError::Sql(format!("MySQL insert failed: {e}")))?;
9394 }
9395 Ok(())
9396 }
9397
9398 fn dtype_sql(&self, dtype: DType) -> &'static str {
9399 match dtype {
9400 DType::Bool | DType::BoolNullable => "TINYINT(1)",
9401 DType::Int64 | DType::Int64Nullable => "BIGINT",
9402 DType::Float64 => "DOUBLE",
9403 DType::Utf8 => "TEXT",
9404 DType::Datetime64 => "DATETIME",
9405 DType::Timedelta64 => "TIME",
9406 _ => "TEXT",
9407 }
9408 }
9409
9410 fn index_dtype_sql(&self, index: &Index) -> &'static str {
9411 mysql_sql_dtype_from_index(index)
9412 }
9413
9414 fn dialect_name(&self) -> &'static str {
9415 "mysql"
9416 }
9417
9418 fn parameter_marker(&self, _ordinal: usize) -> String {
9419 "?".to_owned()
9420 }
9421
9422 fn supports_returning(&self) -> bool {
9423 false
9424 }
9425
9426 fn max_param_count(&self) -> Option<usize> {
9427 Some(65535)
9428 }
9429
9430 fn max_identifier_length(&self) -> Option<usize> {
9431 Some(64)
9432 }
9433
9434 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
9435 if ident.contains('\0') {
9436 return Err(IoError::Sql("invalid identifier: NUL byte".to_owned()));
9437 }
9438 Ok(format!("`{}`", ident.replace('`', "``")))
9439 }
9440
9441 fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
9442 use mysql::prelude::*;
9443 let rows: Vec<(String,)> = self
9444 .conn
9445 .borrow_mut()
9446 .query("SHOW TABLES")
9447 .map_err(|e| IoError::Sql(format!("MySQL list_tables failed: {e}")))?;
9448 Ok(rows.into_iter().map(|(name,)| name).collect())
9449 }
9450}
9451
9452#[cfg(feature = "sql-mysql")]
9453fn mysql_sql_dtype_from_index(index: &Index) -> &'static str {
9454 for label in index.labels() {
9455 match label {
9456 IndexLabel::Int64(_) => return "BIGINT",
9457 IndexLabel::Utf8(_) => return "VARCHAR(255)",
9458 IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "TIME",
9459 IndexLabel::Datetime64(v) if *v != i64::MIN => return "DATETIME",
9460 _ => {}
9461 }
9462 }
9463 "VARCHAR(255)"
9464}
9465
9466#[cfg(feature = "sql-mysql")]
9467fn scalar_to_mysql_value(s: &Scalar) -> mysql::Value {
9468 match s {
9469 Scalar::Null(_) => mysql::Value::NULL,
9470 Scalar::Bool(b) => mysql::Value::from(*b),
9471 Scalar::Int64(i) => mysql::Value::from(*i),
9472 Scalar::Float64(f) => mysql::Value::from(*f),
9473 Scalar::Utf8(s) => mysql::Value::from(s.as_str()),
9474 _ => mysql::Value::NULL,
9475 }
9476}
9477
9478#[cfg(feature = "sql-mysql")]
9479fn mysql_value_to_scalar(v: Option<mysql::Value>) -> Scalar {
9480 match v {
9481 None | Some(mysql::Value::NULL) => Scalar::Null(crate::NullKind::Null),
9482 Some(mysql::Value::Bytes(b)) => Scalar::Utf8(String::from_utf8_lossy(&b).into_owned()),
9483 Some(mysql::Value::Int(i)) => Scalar::Int64(i),
9484 Some(mysql::Value::UInt(u)) => Scalar::Int64(u as i64),
9485 Some(mysql::Value::Float(f)) => Scalar::Float64(f as f64),
9486 Some(mysql::Value::Double(d)) => Scalar::Float64(d),
9487 _ => Scalar::Null(crate::NullKind::Null),
9488 }
9489}
9490
9491#[cfg(test)]
9492fn sql_create_table_query<C: SqlConnection>(
9493 conn: &C,
9494 table_name: &str,
9495 column_defs: &[String],
9496) -> Result<String, IoError> {
9497 sql_create_table_query_in_schema(conn, table_name, None, column_defs)
9498}
9499
9500fn sql_create_table_query_in_schema<C: SqlConnection>(
9507 conn: &C,
9508 table_name: &str,
9509 schema: Option<&str>,
9510 column_defs: &[String],
9511) -> Result<String, IoError> {
9512 validate_sql_table_name(table_name)?;
9513 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9514 let qualified = match schema {
9515 Some(s) if conn.supports_schemas() => {
9516 validate_sql_schema_name(s)?;
9517 format!(
9518 "{}.{}",
9519 conn.quote_identifier(s)?,
9520 conn.quote_identifier(table_name)?
9521 )
9522 }
9523 _ => conn.quote_identifier(table_name)?,
9524 };
9525 Ok(format!(
9526 "CREATE TABLE IF NOT EXISTS {qualified} ({})",
9527 column_defs.join(", ")
9528 ))
9529}
9530
9531#[cfg(test)]
9532fn sql_insert_rows_query<C: SqlConnection>(
9533 conn: &C,
9534 table_name: &str,
9535 column_names: &[String],
9536) -> Result<String, IoError> {
9537 sql_insert_rows_query_in_schema(conn, table_name, None, column_names)
9538}
9539
9540fn sql_insert_rows_query_in_schema<C: SqlConnection>(
9546 conn: &C,
9547 table_name: &str,
9548 schema: Option<&str>,
9549 column_names: &[String],
9550) -> Result<String, IoError> {
9551 validate_sql_table_name(table_name)?;
9552 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9553 validate_sql_column_identifier_lengths(conn, column_names.iter())?;
9554 let qualified = match schema {
9555 Some(s) if conn.supports_schemas() => {
9556 validate_sql_schema_name(s)?;
9557 format!(
9558 "{}.{}",
9559 conn.quote_identifier(s)?,
9560 conn.quote_identifier(table_name)?
9561 )
9562 }
9563 _ => conn.quote_identifier(table_name)?,
9564 };
9565 let quoted_columns = column_names
9566 .iter()
9567 .map(|name| conn.quote_identifier(name))
9568 .collect::<Result<Vec<_>, _>>()?
9569 .join(", ");
9570 let placeholders = (1..=column_names.len())
9571 .map(|ordinal| conn.parameter_marker(ordinal))
9572 .collect::<Vec<_>>()
9573 .join(", ");
9574 Ok(format!(
9575 "INSERT INTO {qualified} ({quoted_columns}) VALUES ({placeholders})"
9576 ))
9577}
9578
9579fn sql_multi_row_insert_query_in_schema<C: SqlConnection>(
9589 conn: &C,
9590 table_name: &str,
9591 schema: Option<&str>,
9592 column_names: &[String],
9593 num_rows: usize,
9594) -> Result<String, IoError> {
9595 validate_sql_table_name(table_name)?;
9596 if num_rows == 0 || column_names.is_empty() {
9597 return Err(IoError::Sql(
9598 "multi-row insert requires at least one row and one column".to_owned(),
9599 ));
9600 }
9601 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9602 validate_sql_column_identifier_lengths(conn, column_names.iter())?;
9603 let qualified = match schema {
9604 Some(s) if conn.supports_schemas() => {
9605 validate_sql_schema_name(s)?;
9606 format!(
9607 "{}.{}",
9608 conn.quote_identifier(s)?,
9609 conn.quote_identifier(table_name)?
9610 )
9611 }
9612 _ => conn.quote_identifier(table_name)?,
9613 };
9614 let quoted_columns = column_names
9615 .iter()
9616 .map(|name| conn.quote_identifier(name))
9617 .collect::<Result<Vec<_>, _>>()?
9618 .join(", ");
9619 let cols = column_names.len();
9620 let mut tuples = Vec::with_capacity(num_rows);
9621 let mut next_ord = 1usize;
9622 for _ in 0..num_rows {
9623 let row_placeholders = (0..cols)
9624 .map(|_| {
9625 let marker = conn.parameter_marker(next_ord);
9626 next_ord += 1;
9627 marker
9628 })
9629 .collect::<Vec<_>>()
9630 .join(", ");
9631 tuples.push(format!("({row_placeholders})"));
9632 }
9633 Ok(format!(
9634 "INSERT INTO {qualified} ({quoted_columns}) VALUES {}",
9635 tuples.join(", ")
9636 ))
9637}
9638
9639fn sql_drop_table_query_in_schema<C: SqlConnection>(
9649 conn: &C,
9650 table_name: &str,
9651 schema: Option<&str>,
9652) -> Result<String, IoError> {
9653 validate_sql_table_name(table_name)?;
9654 validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9655 let qualified = match schema {
9656 Some(s) if conn.supports_schemas() => {
9657 validate_sql_schema_name(s)?;
9658 format!(
9659 "{}.{}",
9660 conn.quote_identifier(s)?,
9661 conn.quote_identifier(table_name)?
9662 )
9663 }
9664 _ => conn.quote_identifier(table_name)?,
9665 };
9666 Ok(format!("DROP TABLE IF EXISTS {qualified}"))
9667}
9668
9669pub fn read_sql<C: SqlConnection>(conn: &C, query: &str) -> Result<DataFrame, IoError> {
9673 read_sql_with_options(conn, query, &SqlReadOptions::default())
9674}
9675
9676pub fn read_sql_with_options<C: SqlConnection>(
9680 conn: &C,
9681 query: &str,
9682 options: &SqlReadOptions,
9683) -> Result<DataFrame, IoError> {
9684 if options.columns.is_some() {
9690 return Err(IoError::Sql(
9691 "options.columns is meaningful only for table readers; embed the column list in \
9692 the SELECT or use read_sql_table_with_options to generate the projection from a \
9693 table name"
9694 .to_owned(),
9695 ));
9696 }
9697 let (headers, columns, dtype_hints) = sql_query_to_columns(conn, query, options)?;
9698 let frame = dataframe_from_sql_columns(headers, columns, dtype_hints)?;
9699 apply_sql_index_col(frame, options.index_col.as_deref())
9700}
9701
9702fn apply_sql_index_col(frame: DataFrame, index_col: Option<&str>) -> Result<DataFrame, IoError> {
9705 let Some(name) = index_col else {
9706 return Ok(frame);
9707 };
9708 if name.is_empty() {
9709 return Err(IoError::Sql(
9710 "index_col: empty string is not a valid column name".to_owned(),
9711 ));
9712 }
9713 promote_column_to_index(&frame, name)
9714}
9715
9716pub fn read_sql_query<C: SqlConnection>(conn: &C, query: &str) -> Result<DataFrame, IoError> {
9721 read_sql(conn, query)
9722}
9723
9724pub fn read_sql_query_with_options<C: SqlConnection>(
9729 conn: &C,
9730 query: &str,
9731 options: &SqlReadOptions,
9732) -> Result<DataFrame, IoError> {
9733 read_sql_with_options(conn, query, options)
9734}
9735
9736pub fn read_sql_query_with_options_and_index_col<C: SqlConnection>(
9741 conn: &C,
9742 query: &str,
9743 options: &SqlReadOptions,
9744 index_col: Option<&str>,
9745) -> Result<DataFrame, IoError> {
9746 if let Some(col_name) = index_col {
9747 let cleared = SqlReadOptions {
9748 index_col: None,
9749 ..options.clone()
9750 };
9751 let frame = read_sql_query_with_options(conn, query, &cleared)?;
9752 return apply_sql_index_col(frame, Some(col_name));
9753 }
9754 read_sql_query_with_options(conn, query, options)
9755}
9756
9757pub fn read_sql_query_chunks<'conn, C: SqlConnection + 'conn>(
9761 conn: &'conn C,
9762 query: &str,
9763 chunk_size: usize,
9764) -> Result<SqlChunkIterator<'conn>, IoError> {
9765 read_sql_chunks(conn, query, chunk_size)
9766}
9767
9768pub fn read_sql_query_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
9773 conn: &'conn C,
9774 query: &str,
9775 index_col: Option<&str>,
9776 chunk_size: usize,
9777) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
9778 read_sql_chunks_with_index_col(conn, query, index_col, chunk_size)
9779}
9780
9781pub fn read_sql_query_chunks_with_options<'conn, C: SqlConnection + 'conn>(
9786 conn: &'conn C,
9787 query: &str,
9788 options: &SqlReadOptions,
9789 chunk_size: usize,
9790) -> Result<SqlChunkIterator<'conn>, IoError> {
9791 if options.index_col.is_some() {
9792 return Err(IoError::Sql(
9793 "options.index_col is set but this entrypoint returns SqlChunkIterator without \
9794 index promotion; use read_sql_query_chunks_with_options_and_index_col to honor \
9795 index_col"
9796 .to_owned(),
9797 ));
9798 }
9799 read_sql_chunks_with_options(conn, query, options, chunk_size)
9800}
9801
9802pub fn read_sql_query_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
9807 conn: &'conn C,
9808 query: &str,
9809 options: &SqlReadOptions,
9810 index_col: Option<&str>,
9811 chunk_size: usize,
9812) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
9813 read_sql_chunks_with_options_and_index_col(conn, query, options, index_col, chunk_size)
9814}
9815
9816pub fn read_sql_query_with_index_col<C: SqlConnection>(
9820 conn: &C,
9821 query: &str,
9822 index_col: Option<&str>,
9823) -> Result<DataFrame, IoError> {
9824 read_sql_with_index_col(conn, query, index_col)
9825}
9826
9827fn sql_trim_chunk_source(query: &str) -> Result<&str, IoError> {
9828 let trimmed = query.trim().trim_end_matches(';').trim();
9829 if trimmed.is_empty() {
9830 Err(IoError::Sql("read_sql query must be non-empty".to_owned()))
9831 } else {
9832 Ok(trimmed)
9833 }
9834}
9835
9836fn sql_paged_query<C: SqlConnection + ?Sized>(
9837 conn: &C,
9838 query: &str,
9839 base_param_count: usize,
9840) -> Result<String, IoError> {
9841 let source = sql_trim_chunk_source(query)?;
9842 let limit_marker = conn.parameter_marker(base_param_count + 1);
9843 let offset_marker = conn.parameter_marker(base_param_count + 2);
9844 Ok(format!(
9845 "SELECT * FROM ({source}) AS frankenpandas_sql_chunk_source \
9846 LIMIT {limit_marker} OFFSET {offset_marker}"
9847 ))
9848}
9849
9850fn sql_paged_options(
9851 options: &SqlReadOptions,
9852 limit: usize,
9853 offset: usize,
9854) -> Result<SqlReadOptions, IoError> {
9855 let limit = i64::try_from(limit)
9856 .map_err(|_| IoError::Sql("read_sql chunksize exceeds i64 range".to_owned()))?;
9857 let offset = i64::try_from(offset)
9858 .map_err(|_| IoError::Sql("read_sql chunk offset exceeds i64 range".to_owned()))?;
9859 let mut params = options.params.clone().unwrap_or_default();
9860 params.push(Scalar::Int64(limit));
9861 params.push(Scalar::Int64(offset));
9862 Ok(SqlReadOptions {
9863 params: Some(params),
9864 ..options.clone()
9865 })
9866}
9867
9868fn sql_paged_query_headers<C: SqlConnection + ?Sized>(
9869 conn: &C,
9870 query: &str,
9871 options: &SqlReadOptions,
9872) -> Result<Vec<String>, IoError> {
9873 let base_param_count = options.params.as_ref().map_or(0, Vec::len);
9874 let paged_query = sql_paged_query(conn, query, base_param_count)?;
9875 let paged_options = sql_paged_options(options, 0, 0)?;
9876 let result = conn.query(&paged_query, paged_options.params.as_deref().unwrap_or(&[]))?;
9877 reject_duplicate_headers(&result.columns)?;
9878 Ok(result.columns)
9879}
9880
9881fn sql_query_to_columns_paged<C: SqlConnection + ?Sized>(
9882 conn: &C,
9883 query: &str,
9884 options: &SqlReadOptions,
9885 chunk_size: usize,
9886 offset: usize,
9887) -> Result<SqlMaterializedColumns, IoError> {
9888 let base_param_count = options.params.as_ref().map_or(0, Vec::len);
9889 let paged_query = sql_paged_query(conn, query, base_param_count)?;
9890 let paged_options = sql_paged_options(options, chunk_size, offset)?;
9891 sql_query_to_columns(conn, &paged_query, &paged_options)
9892}
9893
9894fn sql_query_to_columns<C: SqlConnection + ?Sized>(
9895 conn: &C,
9896 query: &str,
9897 options: &SqlReadOptions,
9898) -> Result<SqlMaterializedColumns, IoError> {
9899 let params = options.params.as_deref().unwrap_or(&[]);
9900 let SqlQueryResult {
9901 columns: headers,
9902 rows,
9903 } = conn.query(query, params)?;
9904 reject_duplicate_headers(&headers)?;
9905 let mut dtype_hints = conn.query_column_dtypes(query, params)?;
9906 dtype_hints.resize(headers.len(), None);
9907 let mut columns: Vec<Vec<Scalar>> = (0..headers.len()).map(|_| Vec::new()).collect();
9908
9909 for row in rows {
9910 for (col_idx, value) in row.into_iter().enumerate() {
9911 if let Some(col_vec) = columns.get_mut(col_idx) {
9912 col_vec.push(value);
9913 }
9914 }
9915 }
9916
9917 if let Some(ref parse_dates) = options.parse_dates {
9918 apply_parse_dates(&headers, &mut columns, parse_dates)?;
9919 }
9920 if options.coerce_float {
9921 apply_sql_coerce_float(&mut columns);
9922 }
9923 if let Some(ref dtype_map) = options.dtype {
9924 apply_sql_dtype_overrides(
9925 &headers,
9926 &mut columns,
9927 dtype_map,
9928 options.parse_dates.as_deref().unwrap_or(&[]),
9929 )?;
9930 for (idx, header) in headers.iter().enumerate() {
9931 if let Some(dtype) = dtype_map.get(header)
9932 && !options
9933 .parse_dates
9934 .as_deref()
9935 .unwrap_or(&[])
9936 .iter()
9937 .any(|d| d == header)
9938 {
9939 dtype_hints[idx] = Some(*dtype);
9940 }
9941 }
9942 }
9943
9944 Ok((headers, columns, dtype_hints))
9945}
9946
9947fn apply_sql_dtype_overrides(
9951 headers: &[String],
9952 columns: &mut [Vec<Scalar>],
9953 dtype_map: &BTreeMap<String, DType>,
9954 parse_dates: &[String],
9955) -> Result<(), IoError> {
9956 for (idx, header) in headers.iter().enumerate() {
9957 let Some(target_dtype) = dtype_map.get(header) else {
9958 continue;
9959 };
9960 if parse_dates.iter().any(|d| d == header) {
9961 continue;
9963 }
9964 let Some(col) = columns.get_mut(idx) else {
9965 continue;
9966 };
9967 for value in col.iter_mut() {
9968 let taken = std::mem::replace(value, Scalar::Null(NullKind::Null));
9972 *value = cast_scalar_owned(taken, *target_dtype).map_err(|e| {
9973 IoError::Sql(format!(
9974 "dtype override on column '{header}' to {target_dtype:?} failed: {e}"
9975 ))
9976 })?;
9977 }
9978 }
9979 Ok(())
9980}
9981
9982fn dataframe_from_sql_columns(
9983 headers: Vec<String>,
9984 columns: Vec<Vec<Scalar>>,
9985 dtype_hints: SqlColumnDtypeHints,
9986) -> Result<DataFrame, IoError> {
9987 let row_count = columns.first().map_or(0, Vec::len);
9988 let mut out_columns = BTreeMap::new();
9989 let mut column_order = Vec::new();
9990
9991 for (idx, (name, values)) in headers.into_iter().zip(columns).enumerate() {
9992 let dtype_hint = dtype_hints.get(idx).copied().flatten();
9993 let has_observed_value = values.iter().any(|value| !matches!(value, Scalar::Null(_)));
9994 let column = match (has_observed_value, dtype_hint) {
9995 (false, Some(dtype)) => Column::new(dtype, values)?,
9996 _ => Column::from_values(values)?,
9997 };
9998 out_columns.insert(name.clone(), column);
9999 column_order.push(name);
10000 }
10001
10002 let index = Index::from_i64((0..row_count as i64).collect());
10003 Ok(DataFrame::new_with_column_order(
10004 index,
10005 out_columns,
10006 column_order,
10007 )?)
10008}
10009
10010pub fn read_sql_chunks<'conn, C: SqlConnection + 'conn>(
10016 conn: &'conn C,
10017 query: &str,
10018 chunk_size: usize,
10019) -> Result<SqlChunkIterator<'conn>, IoError> {
10020 read_sql_chunks_with_options(conn, query, &SqlReadOptions::default(), chunk_size)
10021}
10022
10023pub fn read_sql_chunks_with_options<'conn, C: SqlConnection + 'conn>(
10031 conn: &'conn C,
10032 query: &str,
10033 options: &SqlReadOptions,
10034 chunk_size: usize,
10035) -> Result<SqlChunkIterator<'conn>, IoError> {
10036 if chunk_size == 0 {
10037 return Err(IoError::Sql(
10038 "read_sql chunksize must be greater than zero".to_owned(),
10039 ));
10040 }
10041 if options.index_col.is_some() {
10047 return Err(IoError::Sql(
10048 "options.index_col is set but this entrypoint returns SqlChunkIterator without \
10049 index promotion; use read_sql_chunks_with_options_and_index_col to honor index_col"
10050 .to_owned(),
10051 ));
10052 }
10053 if options.columns.is_some() {
10061 return Err(IoError::Sql(
10062 "options.columns is meaningful only for table readers; embed the column list in \
10063 the SELECT or use read_sql_table_chunks_with_options to generate the projection \
10064 from a table name"
10065 .to_owned(),
10066 ));
10067 }
10068
10069 if conn.supports_paged_sql_chunks() {
10070 return SqlChunkIterator::paged(conn, query, options, chunk_size);
10071 }
10072
10073 let (headers, columns, dtype_hints) = sql_query_to_columns(conn, query, options)?;
10074 Ok(SqlChunkIterator::materialized(
10075 headers,
10076 columns,
10077 dtype_hints,
10078 chunk_size,
10079 ))
10080}
10081
10082pub fn read_sql_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
10087 conn: &'conn C,
10088 query: &str,
10089 options: &SqlReadOptions,
10090 index_col: Option<&str>,
10091 chunk_size: usize,
10092) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10093 if options.columns.is_some() {
10098 return Err(IoError::Sql(
10099 "options.columns is meaningful only for table readers; embed the column list in \
10100 the SELECT or use read_sql_table_chunks_with_options_and_index_col to generate \
10101 the projection from a table name"
10102 .to_owned(),
10103 ));
10104 }
10105 let cleared = SqlReadOptions {
10109 index_col: None,
10110 ..options.clone()
10111 };
10112 let inner = read_sql_chunks_with_options(conn, query, &cleared, chunk_size)?;
10113 sql_indexed_chunks(inner, index_col.or(options.index_col.as_deref()))
10114}
10115
10116pub fn read_sql_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
10120 conn: &'conn C,
10121 query: &str,
10122 index_col: Option<&str>,
10123 chunk_size: usize,
10124) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10125 let inner = read_sql_chunks(conn, query, chunk_size)?;
10126 sql_indexed_chunks(inner, index_col)
10127}
10128
10129pub fn read_sql_with_index_col<C: SqlConnection>(
10136 conn: &C,
10137 query: &str,
10138 index_col: Option<&str>,
10139) -> Result<DataFrame, IoError> {
10140 let frame = read_sql(conn, query)?;
10141 apply_sql_index_col(frame, index_col)
10142}
10143
10144pub fn read_sql_table_with_index_col<C: SqlConnection>(
10148 conn: &C,
10149 table_name: &str,
10150 index_col: Option<&str>,
10151) -> Result<DataFrame, IoError> {
10152 let frame = read_sql_table(conn, table_name)?;
10153 apply_sql_index_col(frame, index_col)
10154}
10155
10156fn promote_column_to_index(frame: &DataFrame, col_name: &str) -> Result<DataFrame, IoError> {
10157 let column = frame.column(col_name).ok_or_else(|| {
10158 IoError::Sql(format!(
10159 "index_col {col_name:?} not present in result columns"
10160 ))
10161 })?;
10162 let labels: Vec<IndexLabel> = column
10163 .values()
10164 .iter()
10165 .map(|v| match v {
10166 Scalar::Int64(i) => IndexLabel::Int64(*i),
10167 Scalar::Utf8(s) => IndexLabel::Utf8(s.clone()),
10168 Scalar::Float64(f) if !f.is_nan() => IndexLabel::Utf8(f.to_string()),
10169 Scalar::Bool(b) => IndexLabel::Utf8(if *b { "True" } else { "False" }.to_string()),
10170 Scalar::Timedelta64(ns) => IndexLabel::Timedelta64(*ns),
10171 _ => IndexLabel::Utf8("NaN".to_owned()),
10172 })
10173 .collect();
10174 let new_index = Index::new(labels).set_name(col_name);
10175
10176 let mut new_columns = std::collections::BTreeMap::new();
10177 let mut new_order = Vec::new();
10178 for name in frame.column_names() {
10179 if name == col_name {
10180 continue;
10181 }
10182 if let Some(col) = frame.column(name) {
10183 new_columns.insert(name.clone(), col.clone());
10184 new_order.push(name.clone());
10185 }
10186 }
10187
10188 Ok(DataFrame::new_with_column_order(
10189 new_index,
10190 new_columns,
10191 new_order,
10192 )?)
10193}
10194
10195pub fn read_sql_table<C: SqlConnection>(conn: &C, table_name: &str) -> Result<DataFrame, IoError> {
10199 read_sql(conn, &sql_select_all_query(conn, table_name)?)
10200}
10201
10202pub fn list_sql_tables<C: SqlConnection>(
10216 conn: &C,
10217 schema: Option<&str>,
10218) -> Result<Vec<String>, IoError> {
10219 conn.list_tables(schema)
10220}
10221
10222pub fn sql_table_schema<C: SqlConnection>(
10232 conn: &C,
10233 table_name: &str,
10234 schema: Option<&str>,
10235) -> Result<Option<SqlTableSchema>, IoError> {
10236 conn.table_schema(table_name, schema)
10237}
10238
10239pub fn list_sql_schemas<C: SqlConnection>(conn: &C) -> Result<Vec<String>, IoError> {
10248 conn.list_schemas()
10249}
10250
10251pub fn truncate_sql_table<C: SqlConnection>(
10261 conn: &C,
10262 table_name: &str,
10263 schema: Option<&str>,
10264) -> Result<(), IoError> {
10265 conn.truncate_table(table_name, schema)
10266}
10267
10268pub fn sql_server_version<C: SqlConnection>(conn: &C) -> Result<Option<String>, IoError> {
10278 conn.server_version()
10279}
10280
10281pub fn sql_primary_key_columns<C: SqlConnection>(
10291 conn: &C,
10292 table_name: &str,
10293 schema: Option<&str>,
10294) -> Result<Vec<String>, IoError> {
10295 conn.primary_key_columns(table_name, schema)
10296}
10297
10298pub fn list_sql_indexes<C: SqlConnection>(
10308 conn: &C,
10309 table_name: &str,
10310 schema: Option<&str>,
10311) -> Result<Vec<SqlIndexSchema>, IoError> {
10312 conn.list_indexes(table_name, schema)
10313}
10314
10315pub fn list_sql_views<C: SqlConnection>(
10324 conn: &C,
10325 schema: Option<&str>,
10326) -> Result<Vec<String>, IoError> {
10327 conn.list_views(schema)
10328}
10329
10330pub fn list_sql_foreign_keys<C: SqlConnection>(
10342 conn: &C,
10343 table_name: &str,
10344 schema: Option<&str>,
10345) -> Result<Vec<SqlForeignKeySchema>, IoError> {
10346 conn.list_foreign_keys(table_name, schema)
10347}
10348
10349pub fn sql_table_comment<C: SqlConnection>(
10360 conn: &C,
10361 table_name: &str,
10362 schema: Option<&str>,
10363) -> Result<Option<String>, IoError> {
10364 conn.table_comment(table_name, schema)
10365}
10366
10367pub fn list_sql_unique_constraints<C: SqlConnection>(
10377 conn: &C,
10378 table_name: &str,
10379 schema: Option<&str>,
10380) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
10381 conn.list_unique_constraints(table_name, schema)
10382}
10383
10384pub fn sql_max_identifier_length<C: SqlConnection>(conn: &C) -> Option<usize> {
10394 conn.max_identifier_length()
10395}
10396
10397#[derive(Debug, Clone, PartialEq, Eq)]
10403pub struct SqlBackendCaps {
10404 pub dialect_name: &'static str,
10405 pub server_version: Option<String>,
10406 pub supports_returning: bool,
10407 pub supports_schemas: bool,
10408 pub max_param_count: Option<usize>,
10409 pub max_identifier_length: Option<usize>,
10410}
10411
10412impl SqlBackendCaps {
10413 #[must_use]
10419 pub fn max_insert_rows(&self, column_count: usize) -> Option<usize> {
10420 sql_max_insert_rows_for_columns(self.max_param_count, column_count)
10421 }
10422}
10423
10424#[must_use]
10426pub fn sql_max_param_count<C: SqlConnection>(conn: &C) -> Option<usize> {
10427 conn.max_param_count()
10428}
10429
10430#[must_use]
10432pub fn sql_supports_returning<C: SqlConnection>(conn: &C) -> bool {
10433 conn.supports_returning()
10434}
10435
10436#[must_use]
10438pub fn sql_supports_schemas<C: SqlConnection>(conn: &C) -> bool {
10439 conn.supports_schemas()
10440}
10441
10442#[must_use]
10448pub fn sql_max_insert_rows<C: SqlConnection>(conn: &C, column_count: usize) -> Option<usize> {
10449 sql_max_insert_rows_for_columns(conn.max_param_count(), column_count)
10450}
10451
10452fn sql_max_insert_rows_for_columns(
10453 max_param_count: Option<usize>,
10454 column_count: usize,
10455) -> Option<usize> {
10456 if column_count == 0 {
10457 return None;
10458 }
10459 max_param_count.map(|max| max / column_count)
10460}
10461
10462pub fn sql_backend_caps<C: SqlConnection>(conn: &C) -> Result<SqlBackendCaps, IoError> {
10464 Ok(SqlBackendCaps {
10465 dialect_name: conn.dialect_name(),
10466 server_version: conn.server_version()?,
10467 supports_returning: conn.supports_returning(),
10468 supports_schemas: conn.supports_schemas(),
10469 max_param_count: conn.max_param_count(),
10470 max_identifier_length: conn.max_identifier_length(),
10471 })
10472}
10473
10474#[derive(Debug)]
10496pub struct SqlInspector<'a, C: SqlConnection> {
10497 conn: &'a C,
10498}
10499
10500impl<'a, C: SqlConnection> SqlInspector<'a, C> {
10501 #[must_use]
10503 pub fn new(conn: &'a C) -> Self {
10504 Self { conn }
10505 }
10506
10507 pub fn tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
10509 self.conn.list_tables(schema)
10510 }
10511
10512 pub fn views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
10514 self.conn.list_views(schema)
10515 }
10516
10517 pub fn schemas(&self) -> Result<Vec<String>, IoError> {
10519 self.conn.list_schemas()
10520 }
10521
10522 pub fn columns(
10524 &self,
10525 table_name: &str,
10526 schema: Option<&str>,
10527 ) -> Result<Option<SqlTableSchema>, IoError> {
10528 self.conn.table_schema(table_name, schema)
10529 }
10530
10531 pub fn indexes(
10533 &self,
10534 table_name: &str,
10535 schema: Option<&str>,
10536 ) -> Result<Vec<SqlIndexSchema>, IoError> {
10537 self.conn.list_indexes(table_name, schema)
10538 }
10539
10540 pub fn foreign_keys(
10542 &self,
10543 table_name: &str,
10544 schema: Option<&str>,
10545 ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
10546 self.conn.list_foreign_keys(table_name, schema)
10547 }
10548
10549 pub fn unique_constraints(
10551 &self,
10552 table_name: &str,
10553 schema: Option<&str>,
10554 ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
10555 self.conn.list_unique_constraints(table_name, schema)
10556 }
10557
10558 pub fn primary_key_columns(
10561 &self,
10562 table_name: &str,
10563 schema: Option<&str>,
10564 ) -> Result<Vec<String>, IoError> {
10565 self.conn.primary_key_columns(table_name, schema)
10566 }
10567
10568 pub fn table_comment(
10570 &self,
10571 table_name: &str,
10572 schema: Option<&str>,
10573 ) -> Result<Option<String>, IoError> {
10574 self.conn.table_comment(table_name, schema)
10575 }
10576
10577 pub fn table_exists(&self, table_name: &str, schema: Option<&str>) -> Result<bool, IoError> {
10580 self.conn.table_exists_in_schema(table_name, schema)
10581 }
10582
10583 pub fn server_version(&self) -> Result<Option<String>, IoError> {
10585 self.conn.server_version()
10586 }
10587
10588 #[must_use]
10591 pub fn max_identifier_length(&self) -> Option<usize> {
10592 self.conn.max_identifier_length()
10593 }
10594
10595 #[must_use]
10597 pub fn max_param_count(&self) -> Option<usize> {
10598 self.conn.max_param_count()
10599 }
10600
10601 #[must_use]
10604 pub fn max_insert_rows(&self, column_count: usize) -> Option<usize> {
10605 sql_max_insert_rows_for_columns(self.conn.max_param_count(), column_count)
10606 }
10607
10608 #[must_use]
10610 pub fn supports_returning(&self) -> bool {
10611 self.conn.supports_returning()
10612 }
10613
10614 #[must_use]
10616 pub fn supports_schemas(&self) -> bool {
10617 self.conn.supports_schemas()
10618 }
10619
10620 pub fn backend_caps(&self) -> Result<SqlBackendCaps, IoError> {
10622 sql_backend_caps(self.conn)
10623 }
10624
10625 #[must_use]
10627 pub fn dialect_name(&self) -> &'static str {
10628 self.conn.dialect_name()
10629 }
10630
10631 pub fn has_column(
10639 &self,
10640 table_name: &str,
10641 column_name: &str,
10642 schema: Option<&str>,
10643 ) -> Result<bool, IoError> {
10644 let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10645 return Ok(false);
10646 };
10647 Ok(meta.column(column_name).is_some())
10648 }
10649
10650 pub fn column(
10658 &self,
10659 table_name: &str,
10660 column_name: &str,
10661 schema: Option<&str>,
10662 ) -> Result<Option<SqlColumnSchema>, IoError> {
10663 let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10664 return Ok(None);
10665 };
10666 Ok(meta.column(column_name).cloned())
10667 }
10668
10669 pub fn reflect_table(
10687 &self,
10688 table_name: &str,
10689 schema: Option<&str>,
10690 ) -> Result<Option<SqlReflectedTable>, IoError> {
10691 let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10692 return Ok(None);
10693 };
10694 let primary_key_columns = primary_keys_from_schema(&meta);
10695 let indexes = self.conn.list_indexes(table_name, schema)?;
10696 let foreign_keys = self.conn.list_foreign_keys(table_name, schema)?;
10697 let unique_constraints = self.conn.list_unique_constraints(table_name, schema)?;
10698 let comment = self.conn.table_comment(table_name, schema)?;
10699 Ok(Some(SqlReflectedTable {
10700 table_name: meta.table_name,
10701 columns: meta.columns,
10702 primary_key_columns,
10703 indexes,
10704 foreign_keys,
10705 unique_constraints,
10706 comment,
10707 }))
10708 }
10709
10710 pub fn reflect_all_tables(
10720 &self,
10721 schema: Option<&str>,
10722 ) -> Result<Vec<SqlReflectedTable>, IoError> {
10723 let table_names = self.conn.list_tables(schema)?;
10724 let mut bundles = Vec::with_capacity(table_names.len());
10725 for name in table_names {
10726 if let Some(bundle) = self.reflect_table(&name, schema)? {
10727 bundles.push(bundle);
10728 }
10729 }
10730 Ok(bundles)
10731 }
10732
10733 pub fn reflect_all_views(
10745 &self,
10746 schema: Option<&str>,
10747 ) -> Result<Vec<SqlReflectedTable>, IoError> {
10748 let view_names = self.conn.list_views(schema)?;
10749 let mut bundles = Vec::with_capacity(view_names.len());
10750 for name in view_names {
10751 if let Some(bundle) = self.reflect_table(&name, schema)? {
10752 bundles.push(bundle);
10753 }
10754 }
10755 Ok(bundles)
10756 }
10757}
10758
10759fn primary_keys_from_schema(meta: &SqlTableSchema) -> Vec<String> {
10768 let mut pk: Vec<(usize, String)> = meta
10769 .columns
10770 .iter()
10771 .filter_map(|c| c.primary_key_ordinal.map(|ord| (ord, c.name.clone())))
10772 .collect();
10773 pk.sort_by_key(|(ord, _)| *ord);
10774 pk.into_iter().map(|(_, name)| name).collect()
10775}
10776
10777#[must_use]
10782pub fn inspect<C: SqlConnection>(conn: &C) -> SqlInspector<'_, C> {
10783 SqlInspector::new(conn)
10784}
10785
10786pub fn read_sql_table_with_options<C: SqlConnection>(
10791 conn: &C,
10792 table_name: &str,
10793 options: &SqlReadOptions,
10794) -> Result<DataFrame, IoError> {
10795 let query =
10796 sql_table_read_query_for_options(conn, table_name, options, options.index_col.as_deref())?;
10797 let cleared = SqlReadOptions {
10802 columns: None,
10803 ..options.clone()
10804 };
10805 read_sql_with_options(conn, &query, &cleared)
10806}
10807
10808fn sql_table_read_query_for_options<C: SqlConnection>(
10809 conn: &C,
10810 table_name: &str,
10811 options: &SqlReadOptions,
10812 required_projection_col: Option<&str>,
10813) -> Result<String, IoError> {
10814 match options.columns.as_deref() {
10821 Some(cols) => {
10822 let mut refs: Vec<&str> = Vec::with_capacity(cols.len() + 1);
10823 if let Some(index_col) = required_projection_col
10824 && !cols.iter().any(|name| name == index_col)
10825 {
10826 refs.push(index_col);
10827 }
10828 refs.extend(cols.iter().map(String::as_str));
10829 sql_select_columns_query_in_schema(conn, table_name, options.schema.as_deref(), &refs)
10830 }
10831 None => sql_select_all_query_in_schema(conn, table_name, options.schema.as_deref()),
10832 }
10833}
10834
10835pub fn read_sql_table_with_options_and_index_col<C: SqlConnection>(
10840 conn: &C,
10841 table_name: &str,
10842 options: &SqlReadOptions,
10843 index_col: Option<&str>,
10844) -> Result<DataFrame, IoError> {
10845 if let Some(col_name) = index_col {
10849 let query = sql_table_read_query_for_options(conn, table_name, options, Some(col_name))?;
10855 let cleared = SqlReadOptions {
10856 index_col: None,
10857 columns: None,
10858 ..options.clone()
10859 };
10860 let frame = read_sql_with_options(conn, &query, &cleared)?;
10861 return apply_sql_index_col(frame, Some(col_name));
10862 }
10863 read_sql_table_with_options(conn, table_name, options)
10864}
10865
10866pub fn read_sql_table_chunks<'conn, C: SqlConnection + 'conn>(
10870 conn: &'conn C,
10871 table_name: &str,
10872 chunk_size: usize,
10873) -> Result<SqlChunkIterator<'conn>, IoError> {
10874 read_sql_chunks(conn, &sql_select_all_query(conn, table_name)?, chunk_size)
10875}
10876
10877pub fn read_sql_table_chunks_with_options<'conn, C: SqlConnection + 'conn>(
10882 conn: &'conn C,
10883 table_name: &str,
10884 options: &SqlReadOptions,
10885 chunk_size: usize,
10886) -> Result<SqlChunkIterator<'conn>, IoError> {
10887 if options.index_col.is_some() {
10894 return Err(IoError::Sql(
10895 "options.index_col is set but this entrypoint returns SqlChunkIterator without \
10896 index promotion; use read_sql_table_chunks_with_options_and_index_col to honor \
10897 index_col"
10898 .to_owned(),
10899 ));
10900 }
10901 let query = match options.columns.as_deref() {
10902 Some(cols) => {
10903 let refs: Vec<&str> = cols.iter().map(String::as_str).collect();
10904 sql_select_columns_query_in_schema(conn, table_name, options.schema.as_deref(), &refs)?
10905 }
10906 None => sql_select_all_query_in_schema(conn, table_name, options.schema.as_deref())?,
10907 };
10908 let cleared = SqlReadOptions {
10911 columns: None,
10912 ..options.clone()
10913 };
10914 read_sql_chunks_with_options(conn, &query, &cleared, chunk_size)
10915}
10916
10917pub fn read_sql_table_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
10922 conn: &'conn C,
10923 table_name: &str,
10924 options: &SqlReadOptions,
10925 index_col: Option<&str>,
10926 chunk_size: usize,
10927) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10928 let effective_index_col = index_col.or(options.index_col.as_deref());
10929 let query = sql_table_read_query_for_options(conn, table_name, options, effective_index_col)?;
10930 let cleared = SqlReadOptions {
10935 index_col: None,
10936 columns: None,
10937 ..options.clone()
10938 };
10939 let inner = read_sql_chunks_with_options(conn, &query, &cleared, chunk_size)?;
10940 sql_indexed_chunks(inner, effective_index_col)
10941}
10942
10943pub fn read_sql_table_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
10948 conn: &'conn C,
10949 table_name: &str,
10950 index_col: Option<&str>,
10951 chunk_size: usize,
10952) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10953 let inner = read_sql_table_chunks(conn, table_name, chunk_size)?;
10954 sql_indexed_chunks(inner, index_col)
10955}
10956
10957pub fn read_sql_table_columns<C: SqlConnection>(
10966 conn: &C,
10967 table_name: &str,
10968 columns: &[&str],
10969) -> Result<DataFrame, IoError> {
10970 read_sql(conn, &sql_select_columns_query(conn, table_name, columns)?)
10971}
10972
10973pub fn read_sql_table_columns_with_index_col<C: SqlConnection>(
10982 conn: &C,
10983 table_name: &str,
10984 columns: &[&str],
10985 index_col: Option<&str>,
10986) -> Result<DataFrame, IoError> {
10987 let projection = projection_with_index_col(columns, index_col)?;
10988 let frame = read_sql_table_columns(conn, table_name, &projection)?;
10989 apply_sql_index_col(frame, index_col)
10990}
10991
10992pub fn read_sql_table_columns_chunks<'conn, C: SqlConnection + 'conn>(
10999 conn: &'conn C,
11000 table_name: &str,
11001 columns: &[&str],
11002 chunk_size: usize,
11003) -> Result<SqlChunkIterator<'conn>, IoError> {
11004 read_sql_chunks(
11005 conn,
11006 &sql_select_columns_query(conn, table_name, columns)?,
11007 chunk_size,
11008 )
11009}
11010
11011pub fn read_sql_table_columns_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
11019 conn: &'conn C,
11020 table_name: &str,
11021 columns: &[&str],
11022 index_col: Option<&str>,
11023 chunk_size: usize,
11024) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
11025 let projection = projection_with_index_col(columns, index_col)?;
11026 let inner = read_sql_table_columns_chunks(conn, table_name, &projection, chunk_size)?;
11027 sql_indexed_chunks(inner, index_col)
11028}
11029
11030fn projection_with_index_col<'a>(
11037 columns: &'a [&'a str],
11038 index_col: Option<&'a str>,
11039) -> Result<Vec<&'a str>, IoError> {
11040 match index_col {
11041 Some("") => Err(IoError::Sql(
11042 "index_col: empty string is not a valid column name".to_owned(),
11043 )),
11044 Some(name) if !columns.is_empty() && !columns.contains(&name) => {
11045 let mut out = Vec::with_capacity(columns.len() + 1);
11046 out.push(name);
11047 out.extend_from_slice(columns);
11048 Ok(out)
11049 }
11050 _ => Ok(columns.to_vec()),
11051 }
11052}
11053
11054pub fn write_sql<C: SqlConnection>(
11058 frame: &DataFrame,
11059 conn: &C,
11060 table_name: &str,
11061 if_exists: SqlIfExists,
11062) -> Result<(), IoError> {
11063 write_sql_with_options(
11064 frame,
11065 conn,
11066 table_name,
11067 &SqlWriteOptions {
11068 if_exists,
11069 index: false,
11070 index_label: None,
11071 schema: None,
11072 dtype: None,
11073 method: SqlInsertMethod::Single,
11074 chunksize: None,
11075 },
11076 )
11077}
11078
11079pub fn write_sql_with_options<C: SqlConnection>(
11084 frame: &DataFrame,
11085 conn: &C,
11086 table_name: &str,
11087 options: &SqlWriteOptions,
11088) -> Result<(), IoError> {
11089 if table_name.is_empty() || !table_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
11091 return Err(IoError::Sql(format!(
11092 "invalid table name: '{table_name}' (must be non-empty, only alphanumeric and underscore allowed)"
11093 )));
11094 }
11095
11096 let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
11097 let index_label = resolve_sql_index_label(frame, options)?;
11098 let mut sql_col_names =
11099 Vec::with_capacity(col_names.len() + usize::from(index_label.is_some()));
11100 if let Some(ref label) = index_label {
11101 sql_col_names.push(label.clone());
11102 }
11103 sql_col_names.extend(col_names.iter().cloned());
11104
11105 let max_ident = conn.max_identifier_length();
11109 validate_sql_identifier_length(table_name, max_ident, "table")?;
11110 if let Some(ref label) = index_label {
11111 validate_sql_identifier_length(label, max_ident, "index label")?;
11112 }
11113 for name in &col_names {
11114 validate_sql_identifier_length(name, max_ident, "column")?;
11115 }
11116 if let Some(s) = options.schema.as_deref() {
11117 validate_sql_identifier_length(s, max_ident, "schema")?;
11118 }
11119
11120 let schema = options.schema.as_deref();
11122 match options.if_exists {
11123 SqlIfExists::Fail => {
11124 let exists = conn.table_exists_in_schema(table_name, schema)?;
11125 if exists {
11126 return Err(IoError::Sql(format!("table '{table_name}' already exists")));
11127 }
11128 }
11129 SqlIfExists::Replace => {
11130 let drop_sql = sql_drop_table_query_in_schema(conn, table_name, schema)?;
11131 conn.execute_batch(&drop_sql)?;
11132 }
11133 SqlIfExists::Append => {
11134 }
11136 }
11137
11138 let mut col_defs = Vec::with_capacity(sql_col_names.len());
11140 if let Some(ref label) = index_label {
11141 col_defs.push(sql_column_definition(
11142 conn,
11143 label,
11144 conn.index_dtype_sql(frame.index()),
11145 )?);
11146 }
11147 let dtype_overrides = options.dtype.as_ref();
11148 col_defs.extend(
11149 col_names
11150 .iter()
11151 .map(|name| {
11152 let override_sql = dtype_overrides
11155 .and_then(|m| m.get(name))
11156 .map(|s| s.as_str());
11157 let sql_type = match override_sql {
11158 Some(s) => s,
11159 None => {
11160 let dt = frame.column(name).map_or(DType::Utf8, |c| c.dtype());
11161 conn.dtype_sql(dt)
11162 }
11163 };
11164 sql_column_definition(conn, name, sql_type)
11165 })
11166 .collect::<Result<Vec<_>, IoError>>()?,
11167 );
11168
11169 let create_sql = sql_create_table_query_in_schema(conn, table_name, schema, &col_defs)?;
11170 conn.execute_batch(&create_sql)?;
11171
11172 let nrows = frame.index().len();
11173 let ncols = sql_col_names.len();
11174 let mut rows = Vec::with_capacity(nrows);
11175 for row_idx in 0..nrows {
11176 let mut row = Vec::with_capacity(ncols);
11177 if options.index {
11178 row.push(scalar_from_index_label(&frame.index().labels()[row_idx]));
11179 }
11180 row.extend(col_names.iter().map(|name| {
11181 frame
11182 .column(name)
11183 .and_then(|col| col.value(row_idx))
11184 .cloned()
11185 .unwrap_or(Scalar::Null(NullKind::Null))
11186 }));
11187 rows.push(row);
11188 }
11189
11190 if rows.is_empty() {
11191 return Ok(());
11193 }
11194
11195 if let Some(0) = options.chunksize {
11198 return Err(IoError::Sql(
11199 "invalid chunksize: 0 (must be > 0 if Some)".to_owned(),
11200 ));
11201 }
11202
11203 match options.method {
11204 SqlInsertMethod::Single => {
11205 let insert_sql =
11206 sql_insert_rows_query_in_schema(conn, table_name, schema, &sql_col_names)?;
11207 match options.chunksize {
11208 None => {
11209 conn.insert_rows(&insert_sql, &rows)?;
11210 }
11211 Some(n) => {
11212 for chunk in rows.chunks(n) {
11213 conn.insert_rows(&insert_sql, chunk)?;
11214 }
11215 }
11216 }
11217 }
11218 SqlInsertMethod::Multi => {
11219 let param_chunk = match conn.max_param_count() {
11224 Some(max) if ncols > 0 => {
11225 let per_chunk = max / ncols;
11226 if per_chunk == 0 {
11227 return Err(IoError::Sql(format!(
11228 "multi-row insert: ncols={ncols} exceeds backend max_param_count={max}"
11229 )));
11230 }
11231 per_chunk
11232 }
11233 _ => rows.len(),
11234 };
11235 let chunk_rows = options
11236 .chunksize
11237 .map(|cs| cs.min(param_chunk))
11238 .unwrap_or(param_chunk);
11239 for chunk in rows.chunks(chunk_rows) {
11240 let chunk_sql = sql_multi_row_insert_query_in_schema(
11241 conn,
11242 table_name,
11243 schema,
11244 &sql_col_names,
11245 chunk.len(),
11246 )?;
11247 let mut flat = Vec::with_capacity(chunk.len() * ncols);
11248 for row in chunk {
11249 flat.extend(row.iter().cloned());
11250 }
11251 conn.insert_rows(&chunk_sql, &[flat])?;
11252 }
11253 }
11254 }
11255
11256 Ok(())
11257}
11258
11259pub trait DataFrameIoExt {
11267 fn to_parquet(&self, path: &Path) -> Result<(), IoError>;
11271
11272 fn to_parquet_bytes(&self) -> Result<Vec<u8>, IoError>;
11276
11277 fn to_orc(&self, path: &Path) -> Result<(), IoError>;
11281
11282 fn to_orc_file(&self, path: &Path) -> Result<(), IoError>;
11286
11287 fn to_orc_bytes(&self) -> Result<Vec<u8>, IoError>;
11289
11290 fn to_hdf(&self, path: &Path) -> Result<(), IoError>;
11294
11295 fn to_hdf_file(&self, path: &Path) -> Result<(), IoError>;
11299
11300 fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError>;
11302
11303 fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError>;
11305
11306 fn to_csv_file(&self, path: &Path) -> Result<(), IoError>;
11310
11311 fn to_csv_string(&self) -> Result<String, IoError>;
11315
11316 fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError>;
11320
11321 fn to_markdown_string(&self) -> Result<String, IoError>;
11325
11326 fn to_markdown_string_with_options(
11328 &self,
11329 options: &MarkdownWriteOptions,
11330 ) -> Result<String, IoError>;
11331
11332 fn to_markdown_file(&self, path: &Path) -> Result<(), IoError>;
11337
11338 fn to_markdown_file_with_options(
11340 &self,
11341 path: &Path,
11342 options: &MarkdownWriteOptions,
11343 ) -> Result<(), IoError>;
11344
11345 fn to_latex_string(&self) -> Result<String, IoError>;
11349
11350 fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError>;
11352
11353 fn to_latex_file(&self, path: &Path) -> Result<(), IoError>;
11358
11359 fn to_latex_file_with_options(
11361 &self,
11362 path: &Path,
11363 options: &LatexWriteOptions,
11364 ) -> Result<(), IoError>;
11365
11366 fn to_html_string(&self) -> Result<String, IoError>;
11370
11371 fn to_html_string_with_options(&self, options: &HtmlWriteOptions) -> Result<String, IoError>;
11373
11374 fn to_html_file(&self, path: &Path) -> Result<(), IoError>;
11378
11379 fn to_html_file_with_options(
11381 &self,
11382 path: &Path,
11383 options: &HtmlWriteOptions,
11384 ) -> Result<(), IoError>;
11385
11386 fn to_xml_string(&self) -> Result<String, IoError>;
11390
11391 fn to_xml_string_with_options(&self, options: &XmlWriteOptions) -> Result<String, IoError>;
11393
11394 fn to_xml(&self, path: &Path) -> Result<(), IoError>;
11398
11399 fn to_xml_file(&self, path: &Path) -> Result<(), IoError>;
11403
11404 fn to_xml_file_with_options(
11406 &self,
11407 path: &Path,
11408 options: &XmlWriteOptions,
11409 ) -> Result<(), IoError>;
11410
11411 fn to_json_file(&self, path: &Path, orient: JsonOrient) -> Result<(), IoError>;
11415
11416 fn to_json_string(&self, orient: JsonOrient) -> Result<String, IoError>;
11420
11421 fn to_pickle(&self, path: &Path) -> Result<(), IoError>;
11425
11426 fn to_pickle_file(&self, path: &Path) -> Result<(), IoError>;
11430
11431 fn to_pickle_with_options(
11433 &self,
11434 path: &Path,
11435 options: &PickleWriteOptions,
11436 ) -> Result<(), IoError>;
11437
11438 fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError>;
11440
11441 fn to_pickle_bytes_with_options(
11443 &self,
11444 options: &PickleWriteOptions,
11445 ) -> Result<Vec<u8>, IoError>;
11446
11447 fn to_stata(&self, path: &Path) -> Result<(), IoError>;
11451
11452 fn to_stata_file(&self, path: &Path) -> Result<(), IoError>;
11456
11457 fn to_stata_with_options(
11459 &self,
11460 path: &Path,
11461 options: &StataWriteOptions,
11462 ) -> Result<(), IoError>;
11463
11464 fn to_stata_bytes(&self) -> Result<Vec<u8>, IoError>;
11466
11467 fn to_stata_bytes_with_options(&self, options: &StataWriteOptions) -> Result<Vec<u8>, IoError>;
11469
11470 fn to_excel(&self, path: &Path) -> Result<(), IoError>;
11474
11475 fn to_excel_file(&self, path: &Path) -> Result<(), IoError>;
11479
11480 fn to_excel_with_options(
11482 &self,
11483 path: &Path,
11484 options: &ExcelWriteOptions,
11485 ) -> Result<(), IoError>;
11486
11487 fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError>;
11489
11490 fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError>;
11492
11493 fn to_jsonl_file(&self, path: &Path) -> Result<(), IoError>;
11497
11498 fn to_jsonl_string(&self) -> Result<String, IoError>;
11502
11503 fn to_feather(&self, path: &Path) -> Result<(), IoError>;
11507
11508 fn to_feather_file(&self, path: &Path) -> Result<(), IoError>;
11512
11513 fn to_feather_bytes(&self) -> Result<Vec<u8>, IoError>;
11515
11516 fn to_sql<C: SqlConnection>(
11520 &self,
11521 conn: &C,
11522 table_name: &str,
11523 if_exists: SqlIfExists,
11524 ) -> Result<(), IoError>;
11525
11526 fn to_sql_with_options<C: SqlConnection>(
11528 &self,
11529 conn: &C,
11530 table_name: &str,
11531 options: &SqlWriteOptions,
11532 ) -> Result<(), IoError>;
11533
11534 fn to_clipboard(&self) -> Result<(), IoError>;
11536
11537 fn to_gbq(&self, destination_table: &str, project_id: Option<&str>) -> Result<(), IoError>;
11539}
11540
11541impl DataFrameIoExt for DataFrame {
11542 fn to_parquet(&self, path: &Path) -> Result<(), IoError> {
11543 write_parquet(self, path)
11544 }
11545
11546 fn to_parquet_bytes(&self) -> Result<Vec<u8>, IoError> {
11547 write_parquet_bytes(self)
11548 }
11549
11550 fn to_orc(&self, path: &Path) -> Result<(), IoError> {
11551 write_orc(self, path)
11552 }
11553
11554 fn to_orc_file(&self, path: &Path) -> Result<(), IoError> {
11555 self.to_orc(path)
11556 }
11557
11558 fn to_orc_bytes(&self) -> Result<Vec<u8>, IoError> {
11559 write_orc_bytes(self)
11560 }
11561
11562 fn to_hdf(&self, path: &Path) -> Result<(), IoError> {
11563 write_hdf(self, path)
11564 }
11565
11566 fn to_hdf_file(&self, path: &Path) -> Result<(), IoError> {
11567 self.to_hdf(path)
11568 }
11569
11570 fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError> {
11571 write_hdf_key(self, path, key)
11572 }
11573
11574 fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError> {
11575 write_hdf_with_options(self, path, options)
11576 }
11577
11578 fn to_csv_file(&self, path: &Path) -> Result<(), IoError> {
11579 write_csv(self, path)
11580 }
11581
11582 fn to_csv_string(&self) -> Result<String, IoError> {
11583 write_csv_string(self)
11584 }
11585
11586 fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError> {
11587 write_csv_string_with_options(self, options)
11588 }
11589
11590 fn to_markdown_string(&self) -> Result<String, IoError> {
11591 write_markdown_string(self)
11592 }
11593
11594 fn to_markdown_string_with_options(
11595 &self,
11596 options: &MarkdownWriteOptions,
11597 ) -> Result<String, IoError> {
11598 write_markdown_string_with_options(self, options)
11599 }
11600
11601 fn to_markdown_file(&self, path: &Path) -> Result<(), IoError> {
11602 write_markdown(self, path)
11603 }
11604
11605 fn to_markdown_file_with_options(
11606 &self,
11607 path: &Path,
11608 options: &MarkdownWriteOptions,
11609 ) -> Result<(), IoError> {
11610 write_markdown_with_options(self, path, options)
11611 }
11612
11613 fn to_latex_string(&self) -> Result<String, IoError> {
11614 write_latex_string(self)
11615 }
11616
11617 fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError> {
11618 write_latex_string_with_options(self, options)
11619 }
11620
11621 fn to_latex_file(&self, path: &Path) -> Result<(), IoError> {
11622 write_latex(self, path)
11623 }
11624
11625 fn to_latex_file_with_options(
11626 &self,
11627 path: &Path,
11628 options: &LatexWriteOptions,
11629 ) -> Result<(), IoError> {
11630 write_latex_with_options(self, path, options)
11631 }
11632
11633 fn to_html_string(&self) -> Result<String, IoError> {
11634 write_html_string(self)
11635 }
11636
11637 fn to_html_string_with_options(&self, options: &HtmlWriteOptions) -> Result<String, IoError> {
11638 write_html_string_with_options(self, options)
11639 }
11640
11641 fn to_html_file(&self, path: &Path) -> Result<(), IoError> {
11642 write_html(self, path)
11643 }
11644
11645 fn to_html_file_with_options(
11646 &self,
11647 path: &Path,
11648 options: &HtmlWriteOptions,
11649 ) -> Result<(), IoError> {
11650 write_html_with_options(self, path, options)
11651 }
11652
11653 fn to_xml_string(&self) -> Result<String, IoError> {
11654 write_xml_string(self)
11655 }
11656
11657 fn to_xml_string_with_options(&self, options: &XmlWriteOptions) -> Result<String, IoError> {
11658 write_xml_string_with_options(self, options)
11659 }
11660
11661 fn to_xml(&self, path: &Path) -> Result<(), IoError> {
11662 write_xml(self, path)
11663 }
11664
11665 fn to_xml_file(&self, path: &Path) -> Result<(), IoError> {
11666 self.to_xml(path)
11667 }
11668
11669 fn to_xml_file_with_options(
11670 &self,
11671 path: &Path,
11672 options: &XmlWriteOptions,
11673 ) -> Result<(), IoError> {
11674 write_xml_with_options(self, path, options)
11675 }
11676
11677 fn to_json_file(&self, path: &Path, orient: JsonOrient) -> Result<(), IoError> {
11678 write_json(self, path, orient)
11679 }
11680
11681 fn to_json_string(&self, orient: JsonOrient) -> Result<String, IoError> {
11682 write_json_string(self, orient)
11683 }
11684
11685 fn to_pickle(&self, path: &Path) -> Result<(), IoError> {
11686 write_pickle(self, path)
11687 }
11688
11689 fn to_pickle_file(&self, path: &Path) -> Result<(), IoError> {
11690 self.to_pickle(path)
11691 }
11692
11693 fn to_pickle_with_options(
11694 &self,
11695 path: &Path,
11696 options: &PickleWriteOptions,
11697 ) -> Result<(), IoError> {
11698 write_pickle_with_options(self, path, options)
11699 }
11700
11701 fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError> {
11702 write_pickle_bytes(self)
11703 }
11704
11705 fn to_pickle_bytes_with_options(
11706 &self,
11707 options: &PickleWriteOptions,
11708 ) -> Result<Vec<u8>, IoError> {
11709 write_pickle_bytes_with_options(self, options)
11710 }
11711
11712 fn to_stata(&self, path: &Path) -> Result<(), IoError> {
11713 write_stata(self, path)
11714 }
11715
11716 fn to_stata_file(&self, path: &Path) -> Result<(), IoError> {
11717 self.to_stata(path)
11718 }
11719
11720 fn to_stata_with_options(
11721 &self,
11722 path: &Path,
11723 options: &StataWriteOptions,
11724 ) -> Result<(), IoError> {
11725 write_stata_with_options(self, path, options)
11726 }
11727
11728 fn to_stata_bytes(&self) -> Result<Vec<u8>, IoError> {
11729 write_stata_bytes(self)
11730 }
11731
11732 fn to_stata_bytes_with_options(&self, options: &StataWriteOptions) -> Result<Vec<u8>, IoError> {
11733 write_stata_bytes_with_options(self, options)
11734 }
11735
11736 fn to_excel(&self, path: &Path) -> Result<(), IoError> {
11737 write_excel(self, path)
11738 }
11739
11740 fn to_excel_file(&self, path: &Path) -> Result<(), IoError> {
11741 self.to_excel(path)
11742 }
11743
11744 fn to_excel_with_options(
11745 &self,
11746 path: &Path,
11747 options: &ExcelWriteOptions,
11748 ) -> Result<(), IoError> {
11749 write_excel_with_options(self, path, options)
11750 }
11751
11752 fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError> {
11753 write_excel_bytes(self)
11754 }
11755
11756 fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError> {
11757 write_excel_bytes_with_options(self, options)
11758 }
11759
11760 fn to_jsonl_file(&self, path: &Path) -> Result<(), IoError> {
11761 write_jsonl(self, path)
11762 }
11763
11764 fn to_jsonl_string(&self) -> Result<String, IoError> {
11765 write_jsonl_string(self)
11766 }
11767
11768 fn to_feather(&self, path: &Path) -> Result<(), IoError> {
11769 write_feather(self, path)
11770 }
11771
11772 fn to_feather_file(&self, path: &Path) -> Result<(), IoError> {
11773 self.to_feather(path)
11774 }
11775
11776 fn to_feather_bytes(&self) -> Result<Vec<u8>, IoError> {
11777 write_feather_bytes(self)
11778 }
11779
11780 fn to_sql<C: SqlConnection>(
11781 &self,
11782 conn: &C,
11783 table_name: &str,
11784 if_exists: SqlIfExists,
11785 ) -> Result<(), IoError> {
11786 write_sql(self, conn, table_name, if_exists)
11787 }
11788
11789 fn to_sql_with_options<C: SqlConnection>(
11790 &self,
11791 conn: &C,
11792 table_name: &str,
11793 options: &SqlWriteOptions,
11794 ) -> Result<(), IoError> {
11795 write_sql_with_options(self, conn, table_name, options)
11796 }
11797
11798 fn to_clipboard(&self) -> Result<(), IoError> {
11799 let _ = self;
11800 Err(deferred_writer_error(
11801 "to_clipboard",
11802 "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
11803 ))
11804 }
11805
11806 fn to_gbq(&self, _destination_table: &str, _project_id: Option<&str>) -> Result<(), IoError> {
11807 let _ = self;
11808 Err(deferred_writer_error(
11809 "to_gbq",
11810 "Google BigQuery integration is outside FrankenPandas's local file-format scope",
11811 ))
11812 }
11813}
11814
11815pub trait SeriesIoExt {
11827 fn to_pickle(&self, path: &Path) -> Result<(), IoError>;
11832
11833 fn to_pickle_file(&self, path: &Path) -> Result<(), IoError>;
11837
11838 fn to_pickle_with_options(
11840 &self,
11841 path: &Path,
11842 options: &PickleWriteOptions,
11843 ) -> Result<(), IoError>;
11844
11845 fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError>;
11847
11848 fn to_pickle_bytes_with_options(
11850 &self,
11851 options: &PickleWriteOptions,
11852 ) -> Result<Vec<u8>, IoError>;
11853
11854 fn to_csv_file(&self, path: &Path) -> Result<(), IoError>;
11859
11860 fn to_csv_file_with_options(
11862 &self,
11863 path: &Path,
11864 options: &CsvWriteOptions,
11865 ) -> Result<(), IoError>;
11866
11867 fn to_csv_string(&self) -> Result<String, IoError>;
11872
11873 fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError>;
11875
11876 fn to_markdown_string(&self) -> Result<String, IoError>;
11881
11882 fn to_markdown_string_with_options(
11884 &self,
11885 options: &MarkdownWriteOptions,
11886 ) -> Result<String, IoError>;
11887
11888 fn to_markdown_file(&self, path: &Path) -> Result<(), IoError>;
11893
11894 fn to_markdown_file_with_options(
11896 &self,
11897 path: &Path,
11898 options: &MarkdownWriteOptions,
11899 ) -> Result<(), IoError>;
11900
11901 fn to_latex_string(&self) -> Result<String, IoError>;
11906
11907 fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError>;
11909
11910 fn to_latex_file(&self, path: &Path) -> Result<(), IoError>;
11915
11916 fn to_latex_file_with_options(
11918 &self,
11919 path: &Path,
11920 options: &LatexWriteOptions,
11921 ) -> Result<(), IoError>;
11922
11923 fn to_json_file(&self, path: &Path, orient: &str) -> Result<(), IoError>;
11928
11929 fn to_json_string(&self, orient: &str) -> Result<String, IoError>;
11934
11935 fn to_hdf(&self, path: &Path) -> Result<(), IoError>;
11940
11941 fn to_hdf_file(&self, path: &Path) -> Result<(), IoError>;
11945
11946 fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError>;
11948
11949 fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError>;
11951
11952 fn to_excel(&self, path: &Path) -> Result<(), IoError>;
11957
11958 fn to_excel_file(&self, path: &Path) -> Result<(), IoError>;
11962
11963 fn to_excel_with_options(
11965 &self,
11966 path: &Path,
11967 options: &ExcelWriteOptions,
11968 ) -> Result<(), IoError>;
11969
11970 fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError>;
11972
11973 fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError>;
11975
11976 fn to_sql<C: SqlConnection>(
11981 &self,
11982 conn: &C,
11983 table_name: &str,
11984 if_exists: SqlIfExists,
11985 ) -> Result<(), IoError>;
11986
11987 fn to_sql_with_options<C: SqlConnection>(
11989 &self,
11990 conn: &C,
11991 table_name: &str,
11992 options: &SqlWriteOptions,
11993 ) -> Result<(), IoError>;
11994
11995 fn to_clipboard(&self) -> Result<(), IoError>;
11997}
11998
11999impl SeriesIoExt for Series {
12000 fn to_pickle(&self, path: &Path) -> Result<(), IoError> {
12001 write_pickle(&self.to_frame(None)?, path)
12002 }
12003
12004 fn to_pickle_file(&self, path: &Path) -> Result<(), IoError> {
12005 self.to_pickle(path)
12006 }
12007
12008 fn to_pickle_with_options(
12009 &self,
12010 path: &Path,
12011 options: &PickleWriteOptions,
12012 ) -> Result<(), IoError> {
12013 write_pickle_with_options(&self.to_frame(None)?, path, options)
12014 }
12015
12016 fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError> {
12017 write_pickle_bytes(&self.to_frame(None)?)
12018 }
12019
12020 fn to_pickle_bytes_with_options(
12021 &self,
12022 options: &PickleWriteOptions,
12023 ) -> Result<Vec<u8>, IoError> {
12024 write_pickle_bytes_with_options(&self.to_frame(None)?, options)
12025 }
12026
12027 fn to_csv_file(&self, path: &Path) -> Result<(), IoError> {
12028 self.to_csv_file_with_options(
12029 path,
12030 &CsvWriteOptions {
12031 include_index: true,
12032 ..CsvWriteOptions::default()
12033 },
12034 )
12035 }
12036
12037 fn to_csv_file_with_options(
12038 &self,
12039 path: &Path,
12040 options: &CsvWriteOptions,
12041 ) -> Result<(), IoError> {
12042 std::fs::write(path, self.to_csv_string_with_options(options)?)?;
12043 Ok(())
12044 }
12045
12046 fn to_csv_string(&self) -> Result<String, IoError> {
12047 self.to_csv_string_with_options(&CsvWriteOptions {
12048 include_index: true,
12049 ..CsvWriteOptions::default()
12050 })
12051 }
12052
12053 fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError> {
12054 write_csv_string_with_options(&self.to_frame(None)?, options)
12055 }
12056
12057 fn to_markdown_string(&self) -> Result<String, IoError> {
12058 self.to_markdown_string_with_options(&MarkdownWriteOptions::default())
12059 }
12060
12061 fn to_markdown_string_with_options(
12062 &self,
12063 options: &MarkdownWriteOptions,
12064 ) -> Result<String, IoError> {
12065 write_markdown_string_with_options(&self.to_frame(None)?, options)
12066 }
12067
12068 fn to_markdown_file(&self, path: &Path) -> Result<(), IoError> {
12069 self.to_markdown_file_with_options(path, &MarkdownWriteOptions::default())
12070 }
12071
12072 fn to_markdown_file_with_options(
12073 &self,
12074 path: &Path,
12075 options: &MarkdownWriteOptions,
12076 ) -> Result<(), IoError> {
12077 write_markdown_with_options(&self.to_frame(None)?, path, options)
12078 }
12079
12080 fn to_latex_string(&self) -> Result<String, IoError> {
12081 self.to_latex_string_with_options(&LatexWriteOptions::default())
12082 }
12083
12084 fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError> {
12085 write_latex_string_with_options(&self.to_frame(None)?, options)
12086 }
12087
12088 fn to_latex_file(&self, path: &Path) -> Result<(), IoError> {
12089 self.to_latex_file_with_options(path, &LatexWriteOptions::default())
12090 }
12091
12092 fn to_latex_file_with_options(
12093 &self,
12094 path: &Path,
12095 options: &LatexWriteOptions,
12096 ) -> Result<(), IoError> {
12097 write_latex_with_options(&self.to_frame(None)?, path, options)
12098 }
12099
12100 fn to_json_file(&self, path: &Path, orient: &str) -> Result<(), IoError> {
12101 std::fs::write(path, self.to_json_string(orient)?)?;
12102 Ok(())
12103 }
12104
12105 fn to_json_string(&self, orient: &str) -> Result<String, IoError> {
12106 Ok(Series::to_json(self, orient)?)
12107 }
12108
12109 fn to_hdf(&self, path: &Path) -> Result<(), IoError> {
12110 write_hdf(&self.to_frame(None)?, path)
12111 }
12112
12113 fn to_hdf_file(&self, path: &Path) -> Result<(), IoError> {
12114 self.to_hdf(path)
12115 }
12116
12117 fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError> {
12118 write_hdf_key(&self.to_frame(None)?, path, key)
12119 }
12120
12121 fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError> {
12122 write_hdf_with_options(&self.to_frame(None)?, path, options)
12123 }
12124
12125 fn to_excel(&self, path: &Path) -> Result<(), IoError> {
12126 write_excel(&self.to_frame(None)?, path)
12127 }
12128
12129 fn to_excel_file(&self, path: &Path) -> Result<(), IoError> {
12130 self.to_excel(path)
12131 }
12132
12133 fn to_excel_with_options(
12134 &self,
12135 path: &Path,
12136 options: &ExcelWriteOptions,
12137 ) -> Result<(), IoError> {
12138 write_excel_with_options(&self.to_frame(None)?, path, options)
12139 }
12140
12141 fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError> {
12142 write_excel_bytes(&self.to_frame(None)?)
12143 }
12144
12145 fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError> {
12146 write_excel_bytes_with_options(&self.to_frame(None)?, options)
12147 }
12148
12149 fn to_sql<C: SqlConnection>(
12150 &self,
12151 conn: &C,
12152 table_name: &str,
12153 if_exists: SqlIfExists,
12154 ) -> Result<(), IoError> {
12155 write_sql_with_options(
12156 &self.to_frame(None)?,
12157 conn,
12158 table_name,
12159 &SqlWriteOptions {
12160 if_exists,
12161 index: true,
12162 index_label: None,
12163 schema: None,
12164 dtype: None,
12165 method: SqlInsertMethod::Single,
12166 chunksize: None,
12167 },
12168 )
12169 }
12170
12171 fn to_sql_with_options<C: SqlConnection>(
12172 &self,
12173 conn: &C,
12174 table_name: &str,
12175 options: &SqlWriteOptions,
12176 ) -> Result<(), IoError> {
12177 write_sql_with_options(&self.to_frame(None)?, conn, table_name, options)
12178 }
12179
12180 fn to_clipboard(&self) -> Result<(), IoError> {
12181 let _ = self;
12182 Err(deferred_writer_error(
12183 "to_clipboard",
12184 "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
12185 ))
12186 }
12187}
12188
12189#[cfg(test)]
12190mod tests {
12191 use std::collections::BTreeMap;
12192
12193 use arrow::{
12194 array::{Array, Int64Array},
12195 datatypes::DataType as ArrowDataType,
12196 };
12197 use fp_columnar::Column;
12198 use fp_frame::{DataFrame, Series};
12199 use fp_index::{Index, IndexLabel};
12200 use fp_types::{DType, NullKind, Scalar};
12201
12202 use super::{
12203 CsvWriteOptions, ExcelReadOptions, ExcelWriteOptions, HtmlReadOptions, HtmlWriteOptions,
12204 IoError, JsonOrient, LatexWriteOptions, MarkdownWriteOptions, PickleProtocol,
12205 PickleWriteOptions, StataWriteOptions, XmlReadOptions, XmlWriteOptions,
12206 format_pandas_float, read_csv_str, read_csv_with_index_cols, read_excel_bytes,
12207 read_feather_bytes, read_html, read_html_str, read_html_str_with_options, read_json_str,
12208 read_orc, read_orc_bytes, read_parquet_bytes, read_pickle, read_pickle_bytes, read_stata,
12209 read_stata_bytes, read_xml, read_xml_str, read_xml_str_with_options, write_csv_string,
12210 write_csv_string_with_options, write_excel_bytes, write_html, write_html_string,
12211 write_html_string_with_options, write_json_string, write_jsonl_string, write_latex,
12212 write_latex_string, write_latex_string_with_options, write_latex_with_options,
12213 write_markdown, write_markdown_string, write_markdown_string_with_options,
12214 write_markdown_with_options, write_orc, write_orc_bytes, write_pickle, write_pickle_bytes,
12215 write_stata, write_stata_bytes, write_stata_bytes_with_options, write_xml,
12216 write_xml_string, write_xml_string_with_options,
12217 };
12218 #[cfg(feature = "hdf5")]
12219 use super::{
12220 HdfReadOptions, HdfWriteOptions, read_hdf, read_hdf_key, read_hdf_with_options, write_hdf,
12221 write_hdf_key, write_hdf_with_options,
12222 };
12223
12224 #[test]
12225 fn csv_round_trip_preserves_null_and_numeric_shape() {
12226 let input = "id,value\n1,10\n2,\n3,3.5\n";
12227 let frame = read_csv_str(input).expect("read");
12228 let value_col = frame.column("value").expect("value");
12229
12230 assert_eq!(value_col.values()[1], Scalar::Null(NullKind::NaN));
12231
12232 let out = write_csv_string(&frame).expect("write");
12233 assert!(out.contains("id,value"));
12234 assert!(out.contains("3,3.5"));
12235 }
12236
12237 #[test]
12238 fn csv_numeric_fast_path_preserves_default_dtypes_and_values() {
12239 let input = "i,f\n1,0.5\n2,3\n";
12240 let frame = read_csv_str(input).expect("read");
12241
12242 let int_col = frame.column("i").expect("i");
12243 assert_eq!(int_col.dtype(), DType::Int64);
12244 assert_eq!(int_col.values(), &[Scalar::Int64(1), Scalar::Int64(2)]);
12245
12246 let float_col = frame.column("f").expect("f");
12247 assert_eq!(float_col.dtype(), DType::Float64);
12248 assert_eq!(
12249 float_col.values(),
12250 &[Scalar::Float64(0.5), Scalar::Float64(3.0)]
12251 );
12252 }
12253
12254 #[test]
12255 fn csv_numeric_probe_falls_back_to_preserve_object_raw_text() {
12256 let input = "x\n 1 \nabc\n";
12257 let frame = read_csv_str(input).expect("read");
12258 let column = frame.column("x").expect("x");
12259
12260 assert_eq!(column.dtype(), DType::Utf8);
12261 assert_eq!(
12262 column.values(),
12263 &[
12264 Scalar::Utf8(" 1 ".to_owned()),
12265 Scalar::Utf8("abc".to_owned())
12266 ]
12267 );
12268 }
12269
12270 #[test]
12271 fn csv_parses_boolean_true_false_case_insensitive() {
12272 let input = "flag\nTrue\nFALSE\ntrue\nfalse\n";
12273 let frame = read_csv_str(input).expect("read");
12274 let flag_col = frame.column("flag").expect("flag");
12275 assert_eq!(flag_col.values()[0], Scalar::Bool(true));
12276 assert_eq!(flag_col.values()[1], Scalar::Bool(false));
12277 assert_eq!(flag_col.values()[2], Scalar::Bool(true));
12278 assert_eq!(flag_col.values()[3], Scalar::Bool(false));
12279 }
12280
12281 #[test]
12282 fn csv_duplicate_headers_error() {
12283 let input = "a,a\n1,2\n";
12284 let err = read_csv_str(input).expect_err("duplicate header");
12285 assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "a"));
12286 }
12287
12288 #[test]
12289 fn csv_ragged_row_returns_error_4hpid() {
12290 let short_row = "a,b,c\n1,2,3\n4,5\n7,8,9\n";
12296 let err = read_csv_str(short_row).expect_err("short row must reject");
12297 assert!(
12298 matches!(err, IoError::Csv(_)),
12299 "expected IoError::Csv (UnequalLengths from csv crate), got {err:?}"
12300 );
12301 }
12302
12303 fn make_table_format_dataframe() -> DataFrame {
12304 let mut columns = BTreeMap::new();
12305 columns.insert(
12306 "name".to_owned(),
12307 Column::from_values(vec![
12308 Scalar::Utf8("A|B".to_owned()),
12309 Scalar::Utf8("under_score".to_owned()),
12310 ])
12311 .expect("name column"),
12312 );
12313 columns.insert(
12314 "value".to_owned(),
12315 Column::from_values(vec![Scalar::Float64(f64::NAN), Scalar::Int64(2)])
12316 .expect("value column"),
12317 );
12318
12319 let index = Index::new(vec![
12320 IndexLabel::Utf8("r&1".to_owned()),
12321 IndexLabel::Utf8("r_2".to_owned()),
12322 ])
12323 .set_name("row");
12324 DataFrame::new_with_column_order(
12325 index,
12326 columns,
12327 vec!["name".to_owned(), "value".to_owned()],
12328 )
12329 .expect("table format frame")
12330 }
12331
12332 #[test]
12333 fn markdown_table_writer_includes_index_missing_values_and_escaping() {
12334 let frame = make_table_format_dataframe();
12335
12336 let out = write_markdown_string(&frame).expect("markdown");
12337
12338 assert_eq!(
12339 out,
12340 concat!(
12341 "| row | name | value |\n",
12342 "| --- | --- | --- |\n",
12343 "| r&1 | A\\|B | NaN |\n",
12344 "| r_2 | under_score | 2 |\n",
12345 )
12346 );
12347 }
12348
12349 #[test]
12350 fn markdown_table_writer_options_can_omit_index_and_override_na() {
12351 let frame = make_table_format_dataframe();
12352
12353 let out = write_markdown_string_with_options(
12354 &frame,
12355 &MarkdownWriteOptions {
12356 include_index: false,
12357 na_rep: "<missing>".to_owned(),
12358 index_label: Some("ignored".to_owned()),
12359 },
12360 )
12361 .expect("markdown");
12362
12363 assert_eq!(
12364 out,
12365 concat!(
12366 "| name | value |\n",
12367 "| --- | --- |\n",
12368 "| A\\|B | <missing> |\n",
12369 "| under_score | 2 |\n",
12370 )
12371 );
12372 }
12373
12374 #[test]
12375 fn latex_table_writer_emits_booktabs_and_supports_escaping() {
12376 let frame = make_table_format_dataframe();
12377
12378 let out = write_latex_string_with_options(
12379 &frame,
12380 &LatexWriteOptions {
12381 include_index: true,
12382 na_rep: "NA".to_owned(),
12383 index_label: Some("row_id".to_owned()),
12384 escape: true,
12385 },
12386 )
12387 .expect("latex");
12388
12389 assert_eq!(
12390 out,
12391 concat!(
12392 "\\begin{tabular}{lll}\n",
12393 "\\toprule\n",
12394 " & name & value \\\\\n",
12395 "row\\_id & & \\\\\n",
12396 "\\midrule\n",
12397 "r\\&1 & A|B & NA \\\\\n",
12398 "r\\_2 & under\\_score & 2.000000 \\\\\n",
12399 "\\bottomrule\n",
12400 "\\end{tabular}\n",
12401 )
12402 );
12403 }
12404
12405 #[test]
12406 fn to_latex_floats_use_six_decimal_places_like_pandas() {
12407 let values = vec![
12411 Scalar::Float64(1.0),
12412 Scalar::Float64(-2.5),
12413 Scalar::Float64(0.1234567),
12414 Scalar::Float64(f64::INFINITY),
12415 ];
12416 let col = Column::new(DType::Float64, values).expect("col");
12417 let mut cols = BTreeMap::new();
12418 cols.insert("a".to_string(), col);
12419 let index = Index::from_i64((0..4).collect());
12420 let frame =
12421 DataFrame::new_with_column_order(index, cols, vec!["a".to_string()]).expect("frame");
12422
12423 let out = write_latex_string_with_options(
12424 &frame,
12425 &LatexWriteOptions {
12426 include_index: false,
12427 na_rep: "NaN".to_owned(),
12428 index_label: None,
12429 escape: true,
12430 },
12431 )
12432 .expect("latex");
12433 assert!(out.contains("1.000000 \\\\"), "got: {out}");
12434 assert!(out.contains("-2.500000 \\\\"), "got: {out}");
12435 assert!(out.contains("0.123457 \\\\"), "got: {out}");
12436 assert!(out.contains("inf \\\\"), "got: {out}");
12437 }
12438
12439 #[test]
12440 fn markdown_latex_file_writers_match_string_outputs() {
12441 let frame = make_table_format_dataframe();
12442 let markdown_path = std::env::temp_dir().join(format!(
12443 "fp_io_markdown_writer_{}_{}.md",
12444 std::process::id(),
12445 line!()
12446 ));
12447 let latex_path = std::env::temp_dir().join(format!(
12448 "fp_io_latex_writer_{}_{}.tex",
12449 std::process::id(),
12450 line!()
12451 ));
12452
12453 write_markdown(&frame, &markdown_path).expect("write markdown path");
12454 write_latex(&frame, &latex_path).expect("write latex path");
12455
12456 assert_eq!(
12457 std::fs::read_to_string(&markdown_path).expect("read markdown path"),
12458 write_markdown_string(&frame).expect("markdown string")
12459 );
12460 assert_eq!(
12461 std::fs::read_to_string(&latex_path).expect("read latex path"),
12462 write_latex_string(&frame).expect("latex string")
12463 );
12464 }
12465
12466 #[test]
12467 fn markdown_latex_trait_aliases_forward_options() {
12468 use super::DataFrameIoExt;
12469
12470 let frame = make_table_format_dataframe();
12471 let markdown_options = MarkdownWriteOptions {
12472 include_index: false,
12473 na_rep: "NA".to_owned(),
12474 index_label: Some("ignored".to_owned()),
12475 };
12476 let latex_options = LatexWriteOptions {
12477 include_index: false,
12478 na_rep: "NA".to_owned(),
12479 index_label: Some("ignored".to_owned()),
12480 escape: true,
12481 };
12482 let markdown_path = std::env::temp_dir().join(format!(
12483 "fp_io_markdown_trait_{}_{}.md",
12484 std::process::id(),
12485 line!()
12486 ));
12487 let latex_path = std::env::temp_dir().join(format!(
12488 "fp_io_latex_trait_{}_{}.tex",
12489 std::process::id(),
12490 line!()
12491 ));
12492
12493 frame
12494 .to_markdown_file_with_options(&markdown_path, &markdown_options)
12495 .expect("trait markdown file");
12496 frame
12497 .to_latex_file_with_options(&latex_path, &latex_options)
12498 .expect("trait latex file");
12499
12500 assert_eq!(
12501 frame
12502 .to_markdown_string_with_options(&markdown_options)
12503 .expect("trait markdown options"),
12504 std::fs::read_to_string(&markdown_path).expect("read markdown trait path")
12505 );
12506 assert_eq!(
12507 frame
12508 .to_latex_string_with_options(&latex_options)
12509 .expect("trait latex options"),
12510 std::fs::read_to_string(&latex_path).expect("read latex trait path")
12511 );
12512
12513 let default_markdown_path = std::env::temp_dir().join(format!(
12514 "fp_io_markdown_trait_default_{}_{}.md",
12515 std::process::id(),
12516 line!()
12517 ));
12518 let default_latex_path = std::env::temp_dir().join(format!(
12519 "fp_io_latex_trait_default_{}_{}.tex",
12520 std::process::id(),
12521 line!()
12522 ));
12523 frame
12524 .to_markdown_file(&default_markdown_path)
12525 .expect("trait markdown default file");
12526 frame
12527 .to_latex_file(&default_latex_path)
12528 .expect("trait latex default file");
12529
12530 assert_eq!(
12531 std::fs::read_to_string(&default_markdown_path).expect("read markdown default"),
12532 write_markdown_string(&frame).expect("markdown default")
12533 );
12534 assert_eq!(
12535 std::fs::read_to_string(&default_latex_path).expect("read latex default"),
12536 write_latex_string(&frame).expect("latex default")
12537 );
12538
12539 let free_markdown_path = std::env::temp_dir().join(format!(
12540 "fp_io_markdown_free_options_{}_{}.md",
12541 std::process::id(),
12542 line!()
12543 ));
12544 let free_latex_path = std::env::temp_dir().join(format!(
12545 "fp_io_latex_free_options_{}_{}.tex",
12546 std::process::id(),
12547 line!()
12548 ));
12549 write_markdown_with_options(&frame, &free_markdown_path, &markdown_options)
12550 .expect("free markdown options file");
12551 write_latex_with_options(&frame, &free_latex_path, &latex_options)
12552 .expect("free latex options file");
12553 assert_eq!(
12554 std::fs::read_to_string(&free_markdown_path).expect("read free markdown options"),
12555 write_markdown_string_with_options(&frame, &markdown_options)
12556 .expect("free markdown options string")
12557 );
12558 assert_eq!(
12559 std::fs::read_to_string(&free_latex_path).expect("read free latex options"),
12560 write_latex_string_with_options(&frame, &latex_options)
12561 .expect("free latex options string")
12562 );
12563 }
12564
12565 #[test]
12566 fn html_table_writer_defaults_to_index_and_reuses_dataframe_formatter() {
12567 let frame = make_table_format_dataframe();
12568
12569 let out = write_html_string(&frame).expect("html");
12570
12571 assert_eq!(out, frame.to_html(true));
12572 assert!(out.contains("<th>r&1</th>"));
12573 assert!(out.contains("<td>A|B</td>"));
12574 assert!(out.contains("<td>NaN</td>"));
12575 }
12576
12577 #[test]
12578 fn html_table_writer_options_can_omit_index() {
12579 let frame = make_table_format_dataframe();
12580
12581 let out = write_html_string_with_options(
12582 &frame,
12583 &HtmlWriteOptions {
12584 include_index: false,
12585 ..HtmlWriteOptions::default()
12586 },
12587 )
12588 .expect("html");
12589
12590 assert_eq!(out, frame.to_html(false));
12591 assert!(!out.contains("<th>r&1</th>"));
12592 assert!(out.contains("<td>A|B</td>"));
12593 }
12594
12595 #[test]
12596 fn html_table_writer_supports_pandas_pure_string_options_u892h() {
12597 let mut columns = BTreeMap::new();
12598 columns.insert(
12599 "url&col".to_owned(),
12600 Column::from_values(vec![
12601 Scalar::Utf8("https://example.test/a?x=1&y=2".to_owned()),
12602 Scalar::Utf8("<b>".to_owned()),
12603 ])
12604 .expect("url column"),
12605 );
12606 columns.insert(
12607 "value".to_owned(),
12608 Column::from_values(vec![Scalar::Null(NullKind::NaN), Scalar::Float64(2.0)])
12609 .expect("value column"),
12610 );
12611 let frame = DataFrame::new_with_column_order(
12612 Index::new(vec![
12613 IndexLabel::Utf8("r&1".to_owned()),
12614 IndexLabel::Utf8("r2".to_owned()),
12615 ]),
12616 columns,
12617 vec!["url&col".to_owned(), "value".to_owned()],
12618 )
12619 .expect("html options frame");
12620
12621 let out = write_html_string_with_options(
12622 &frame,
12623 &HtmlWriteOptions {
12624 include_index: true,
12625 na_rep: "<NA>".to_owned(),
12626 classes: vec!["table table-sm".to_owned(), "fp".to_owned()],
12627 table_id: Some("report&1".to_owned()),
12628 border: Some(0),
12629 justify: Some("left".to_owned()),
12630 escape: true,
12631 render_links: true,
12632 },
12633 )
12634 .expect("html options");
12635
12636 assert!(
12637 out.starts_with("<table class=\"dataframe table table-sm fp\" id=\"report&1\">")
12638 );
12639 assert!(!out.contains("border=\""));
12640 assert!(out.contains("<tr style=\"text-align: left;\">"));
12641 assert!(out.contains("<th>url&col</th>"));
12642 assert!(out.contains("<th>r&1</th>"));
12643 assert!(out.contains("<td><NA></td>"));
12644 assert!(out.contains(
12645 "<a href=\"https://example.test/a?x=1&y=2\" target=\"_blank\">https://example.test/a?x=1&y=2</a>"
12646 ));
12647 assert!(out.contains("<td><b></td>"));
12648 }
12649
12650 #[test]
12651 fn html_table_writer_can_disable_escaping_u892h() {
12652 let mut columns = BTreeMap::new();
12653 columns.insert(
12654 "raw<th>".to_owned(),
12655 Column::from_values(vec![
12656 Scalar::Utf8("<b>".to_owned()),
12657 Scalar::Null(NullKind::NaN),
12658 ])
12659 .expect("raw column"),
12660 );
12661 let frame = DataFrame::new_with_column_order(
12662 Index::new(vec![
12663 IndexLabel::Utf8("r&1".to_owned()),
12664 IndexLabel::Int64(2),
12665 ]),
12666 columns,
12667 vec!["raw<th>".to_owned()],
12668 )
12669 .expect("raw html frame");
12670
12671 let out = write_html_string_with_options(
12672 &frame,
12673 &HtmlWriteOptions {
12674 na_rep: "<NA>".to_owned(),
12675 escape: false,
12676 ..HtmlWriteOptions::default()
12677 },
12678 )
12679 .expect("raw html options");
12680
12681 assert!(out.contains("<th>raw<th></th>"));
12682 assert!(out.contains("<th>r&1</th>"));
12683 assert!(out.contains("<td><b></td>"));
12684 assert!(out.contains("<td><NA></td>"));
12685 }
12686
12687 #[test]
12688 fn html_table_writer_file_output_matches_string_output() {
12689 use super::DataFrameIoExt;
12690
12691 let frame = make_table_format_dataframe();
12692 let path = std::env::temp_dir().join(format!(
12693 "fp_io_html_writer_{}_{}.html",
12694 std::process::id(),
12695 line!()
12696 ));
12697
12698 write_html(&frame, &path).expect("write html");
12699 let file_out = std::fs::read_to_string(&path).expect("read html");
12700
12701 assert_eq!(file_out, write_html_string(&frame).expect("html string"));
12702 assert_eq!(
12703 frame.to_html_string().expect("trait html string"),
12704 write_html_string(&frame).expect("free html string")
12705 );
12706
12707 let no_index_path = std::env::temp_dir().join(format!(
12708 "fp_io_html_writer_no_index_{}_{}.html",
12709 std::process::id(),
12710 line!()
12711 ));
12712 let no_index_options = HtmlWriteOptions {
12713 include_index: false,
12714 ..HtmlWriteOptions::default()
12715 };
12716 frame
12717 .to_html_file_with_options(&no_index_path, &no_index_options)
12718 .expect("trait html file");
12719 assert_eq!(
12720 std::fs::read_to_string(&no_index_path).expect("read trait html"),
12721 write_html_string_with_options(&frame, &no_index_options).expect("free html options")
12722 );
12723 }
12724
12725 #[test]
12726 fn html_reader_parses_first_table_headers_and_missing_cells() {
12727 let html = concat!(
12728 "<html><body>",
12729 "<table><tr><td>ignored</td></tr></table>",
12730 "<table>",
12731 "<thead><tr><th>name</th><th>value</th><th>flag</th></tr></thead>",
12732 "<tbody>",
12733 "<tr><td>A&B</td><td>1</td><td>True</td></tr>",
12734 "<tr><td>missing</td><td></td></tr>",
12735 "</tbody>",
12736 "</table>",
12737 "</body></html>",
12738 );
12739
12740 let frame = read_html_str_with_options(html, &HtmlReadOptions { table_index: 1 })
12741 .expect("read second table");
12742
12743 assert_eq!(
12744 frame
12745 .column_names()
12746 .into_iter()
12747 .map(String::as_str)
12748 .collect::<Vec<_>>(),
12749 vec!["name", "value", "flag"]
12750 );
12751 assert_eq!(
12752 frame.column("name").expect("name").values()[0],
12753 Scalar::Utf8("A&B".to_owned())
12754 );
12755 assert_eq!(
12756 frame.column("value").expect("value").values()[0],
12757 Scalar::Int64(1)
12758 );
12759 assert!(frame.column("value").expect("value").values()[1].is_missing());
12760 assert_eq!(
12761 frame.column("flag").expect("flag").values()[0],
12762 Scalar::Bool(true)
12763 );
12764 assert!(matches!(
12765 frame.column("flag").expect("flag").values()[1],
12766 Scalar::Null(NullKind::Null)
12767 ));
12768 }
12769
12770 #[test]
12771 fn html_reader_roundtrips_writer_output_as_columns() {
12772 let source = make_table_format_dataframe();
12773 let html = write_html_string(&source).expect("write html");
12774
12775 let frame = read_html_str(&html).expect("read writer html");
12776
12777 assert_eq!(
12778 frame
12779 .column_names()
12780 .into_iter()
12781 .map(String::as_str)
12782 .collect::<Vec<_>>(),
12783 vec!["Unnamed: 0", "name", "value"]
12784 );
12785 assert_eq!(
12786 frame.column("Unnamed: 0").expect("index column").values()[0],
12787 Scalar::Utf8("r&1".to_owned())
12788 );
12789 assert_eq!(
12790 frame.column("name").expect("name").values()[0],
12791 Scalar::Utf8("A|B".to_owned())
12792 );
12793 assert!(frame.column("value").expect("value").values()[0].is_missing());
12794 assert_eq!(
12795 frame.column("value").expect("value").values()[1],
12796 Scalar::Float64(2.0)
12797 );
12798 }
12799
12800 #[test]
12801 fn html_reader_path_reader_matches_string_reader() {
12802 use std::io::Write;
12803
12804 let html = "<table><tr><th>name</th></tr><tr><td>A</td></tr></table>\n";
12805 let path = std::env::temp_dir().join(format!(
12806 "fp_io_html_reader_{}_{}.html",
12807 std::process::id(),
12808 line!()
12809 ));
12810 let mut file = std::fs::OpenOptions::new()
12811 .write(true)
12812 .create_new(true)
12813 .open(&path)
12814 .expect("create html fixture");
12815 file.write_all(html.as_bytes()).expect("write html fixture");
12816
12817 let via_path = read_html(&path).expect("read path html");
12818 let via_str = read_html_str(html).expect("read string html");
12819
12820 assert_eq!(via_path.column_names(), via_str.column_names());
12821 assert_eq!(
12822 via_path.column("name").expect("path name").values(),
12823 via_str.column("name").expect("str name").values()
12824 );
12825 }
12826
12827 #[test]
12828 fn html_reader_rejects_no_table_duplicate_headers_and_wide_rows() {
12829 let err = read_html_str("<p>no table</p>").expect_err("missing table");
12830 assert!(matches!(err, IoError::Html(message) if message.contains("no table")));
12831
12832 let duplicate = "<table><tr><th>a</th><th>a</th></tr><tr><td>1</td><td>2</td></tr></table>";
12833 assert!(matches!(
12834 read_html_str(duplicate),
12835 Err(IoError::DuplicateColumnName(name)) if name == "a"
12836 ));
12837
12838 let wide = "<table><tr><th>a</th></tr><tr><td>1</td><td>2</td></tr></table>";
12839 let err = read_html_str(wide).expect_err("wide row");
12840 assert!(matches!(err, IoError::Html(message) if message.contains("row 0")));
12841 }
12842
12843 #[test]
12844 fn pickle_bytes_roundtrip_preserves_split_frame_shape() {
12845 let source = read_json_str(
12846 r#"{"columns":["name","value","flag"],"index":["r1","r2"],"data":[["alice",1,true],[null,2.5,false]]}"#,
12847 JsonOrient::Split,
12848 )
12849 .expect("source frame");
12850
12851 let bytes = write_pickle_bytes(&source).expect("write pickle bytes");
12852 assert!(!bytes.is_empty());
12853 let roundtrip = read_pickle_bytes(&bytes).expect("read pickle bytes");
12854
12855 assert_eq!(
12856 write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
12857 write_json_string(&source, JsonOrient::Split).expect("source json")
12858 );
12859 }
12860
12861 #[test]
12862 fn pickle_path_reader_matches_bytes_reader() {
12863 let source = make_table_format_dataframe();
12864 let path = std::env::temp_dir().join(format!(
12865 "fp_io_pickle_reader_{}_{}.pkl",
12866 std::process::id(),
12867 line!()
12868 ));
12869
12870 write_pickle(&source, &path).expect("write pickle path");
12871
12872 let via_path = read_pickle(&path).expect("read pickle path");
12873 let via_bytes =
12874 read_pickle_bytes(&std::fs::read(&path).expect("read pickle bytes from path"))
12875 .expect("read pickle bytes");
12876
12877 assert_eq!(
12878 write_json_string(&via_path, JsonOrient::Split).expect("path json"),
12879 write_json_string(&via_bytes, JsonOrient::Split).expect("bytes json")
12880 );
12881 }
12882
12883 #[test]
12884 fn pickle_protocol_v2_and_extension_aliases_roundtrip() {
12885 use super::DataFrameIoExt;
12886
12887 let source = make_table_format_dataframe();
12888 let options = PickleWriteOptions {
12889 protocol: PickleProtocol::V2,
12890 };
12891 let bytes = source
12892 .to_pickle_bytes_with_options(&options)
12893 .expect("trait pickle protocol v2");
12894 let roundtrip = read_pickle_bytes(&bytes).expect("read protocol v2");
12895
12896 assert_eq!(
12897 write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
12898 write_json_string(&source, JsonOrient::Split).expect("source json")
12899 );
12900 assert_eq!(
12901 source.to_pickle_bytes().expect("trait pickle bytes"),
12902 write_pickle_bytes(&source).expect("free pickle bytes")
12903 );
12904 }
12905
12906 #[test]
12907 fn series_pickle_extension_aliases_roundtrip_to_single_column_frame() {
12908 use super::SeriesIoExt;
12909
12910 let source = Series::from_values(
12911 "sales",
12912 vec!["r1".into(), "r2".into()],
12913 vec![Scalar::Int64(10), Scalar::Int64(12)],
12914 )
12915 .expect("source series");
12916
12917 let bytes = source.to_pickle_bytes().expect("series pickle bytes");
12918 let roundtrip = read_pickle_bytes(&bytes).expect("read series pickle frame");
12919 let names = roundtrip
12920 .column_names()
12921 .into_iter()
12922 .map(String::as_str)
12923 .collect::<Vec<_>>();
12924 assert_eq!(names, vec!["sales"]);
12925 assert_eq!(roundtrip.index().labels(), source.index().labels());
12926 assert_eq!(
12927 roundtrip.column("sales").expect("sales column").values(),
12928 source.values()
12929 );
12930
12931 let frame = source.to_frame(None).expect("series frame");
12932 assert_eq!(
12933 source.to_pickle_bytes().expect("trait pickle bytes"),
12934 write_pickle_bytes(&frame).expect("frame pickle bytes")
12935 );
12936
12937 let options = PickleWriteOptions {
12938 protocol: PickleProtocol::V2,
12939 };
12940 assert!(
12941 !source
12942 .to_pickle_bytes_with_options(&options)
12943 .expect("series pickle protocol v2")
12944 .is_empty()
12945 );
12946 }
12947
12948 #[test]
12949 fn series_csv_extension_aliases_preserve_default_index() {
12950 use super::SeriesIoExt;
12951
12952 let source = Series::from_values(
12953 "sales",
12954 vec!["r1".into(), "r2".into()],
12955 vec![Scalar::Int64(10), Scalar::Int64(12)],
12956 )
12957 .expect("source series");
12958
12959 let csv = source.to_csv_string().expect("series csv string");
12960 assert_eq!(csv, ",sales\nr1,10\nr2,12\n");
12961
12962 let no_index = source
12963 .to_csv_string_with_options(&CsvWriteOptions {
12964 include_index: false,
12965 ..CsvWriteOptions::default()
12966 })
12967 .expect("series csv without index");
12968 assert_eq!(no_index, "sales\n10\n12\n");
12969
12970 let path = std::env::temp_dir().join(format!(
12971 "fp_io_series_csv_{}_{}.csv",
12972 std::process::id(),
12973 line!()
12974 ));
12975 source.to_csv_file(&path).expect("series csv file");
12976 assert_eq!(
12977 std::fs::read_to_string(&path).expect("read series csv file"),
12978 csv
12979 );
12980 }
12981
12982 #[test]
12983 fn series_json_extension_aliases_use_series_orients() {
12984 use super::SeriesIoExt;
12985
12986 let source = Series::from_values(
12987 "sales",
12988 vec!["r1".into(), "r2".into()],
12989 vec![Scalar::Int64(10), Scalar::Int64(12)],
12990 )
12991 .expect("source series");
12992
12993 assert_eq!(
12994 source
12995 .to_json_string("records")
12996 .expect("series records json"),
12997 "[10,12]"
12998 );
12999
13000 let split: serde_json::Value =
13001 serde_json::from_str(&source.to_json_string("split").expect("series split json"))
13002 .expect("parse split json");
13003 assert_eq!(split["name"], "sales");
13004 assert_eq!(split["index"], serde_json::json!(["r1", "r2"]));
13005 assert_eq!(split["data"], serde_json::json!([10, 12]));
13006
13007 let path = std::env::temp_dir().join(format!(
13008 "fp_io_series_json_{}_{}.json",
13009 std::process::id(),
13010 line!()
13011 ));
13012 source
13013 .to_json_file(&path, "index")
13014 .expect("series json file");
13015 assert_eq!(
13016 std::fs::read_to_string(&path).expect("read series json file"),
13017 source.to_json("index").expect("series index json")
13018 );
13019 }
13020
13021 #[test]
13022 fn series_markdown_extension_aliases_forward_options() {
13023 use super::SeriesIoExt;
13024
13025 let source = Series::from_values(
13026 "sales",
13027 vec!["r1".into(), "r2".into()],
13028 vec![Scalar::Int64(10), Scalar::Null(NullKind::NaN)],
13029 )
13030 .expect("source series");
13031 let options = MarkdownWriteOptions {
13032 include_index: false,
13033 na_rep: "NA".to_owned(),
13034 index_label: Some("ignored".to_owned()),
13035 };
13036
13037 assert_eq!(
13038 source.to_markdown_string().expect("series markdown string"),
13039 write_markdown_string(&source.to_frame(None).expect("series frame"))
13040 .expect("frame markdown string")
13041 );
13042 assert_eq!(
13043 source
13044 .to_markdown_string_with_options(&options)
13045 .expect("series markdown options"),
13046 write_markdown_string_with_options(
13047 &source.to_frame(None).expect("series options frame"),
13048 &options,
13049 )
13050 .expect("frame markdown options")
13051 );
13052
13053 let path = std::env::temp_dir().join(format!(
13054 "fp_io_series_markdown_{}_{}.md",
13055 std::process::id(),
13056 line!()
13057 ));
13058 source
13059 .to_markdown_file_with_options(&path, &options)
13060 .expect("series markdown file");
13061 assert_eq!(
13062 std::fs::read_to_string(&path).expect("read series markdown file"),
13063 source
13064 .to_markdown_string_with_options(&options)
13065 .expect("series markdown options string")
13066 );
13067 }
13068
13069 #[test]
13070 fn series_latex_extension_aliases_forward_options() {
13071 use super::SeriesIoExt;
13072
13073 let source = Series::from_values(
13074 "sales&tax",
13075 vec!["r1".into(), "r2".into()],
13076 vec![Scalar::Utf8("a&b".into()), Scalar::Null(NullKind::NaN)],
13077 )
13078 .expect("source series");
13079 let options = LatexWriteOptions {
13080 include_index: false,
13081 na_rep: "NA".to_owned(),
13082 index_label: Some("ignored".to_owned()),
13083 escape: true,
13084 };
13085
13086 assert_eq!(
13087 source.to_latex_string().expect("series latex string"),
13088 write_latex_string(&source.to_frame(None).expect("series frame"))
13089 .expect("frame latex string")
13090 );
13091 assert_eq!(
13092 source
13093 .to_latex_string_with_options(&options)
13094 .expect("series latex options"),
13095 write_latex_string_with_options(
13096 &source.to_frame(None).expect("series options frame"),
13097 &options,
13098 )
13099 .expect("frame latex options")
13100 );
13101
13102 let path = std::env::temp_dir().join(format!(
13103 "fp_io_series_latex_{}_{}.tex",
13104 std::process::id(),
13105 line!()
13106 ));
13107 source
13108 .to_latex_file_with_options(&path, &options)
13109 .expect("series latex file");
13110 assert_eq!(
13111 std::fs::read_to_string(&path).expect("read series latex file"),
13112 source
13113 .to_latex_string_with_options(&options)
13114 .expect("series latex options string")
13115 );
13116 }
13117
13118 #[cfg(feature = "hdf5")]
13119 #[test]
13120 fn series_hdf5_extension_aliases_roundtrip_to_single_column_frame() {
13121 use super::SeriesIoExt;
13122
13123 let source = Series::from_values(
13124 "sales",
13125 vec!["r1".into(), "r2".into()],
13126 vec![Scalar::Int64(10), Scalar::Int64(12)],
13127 )
13128 .expect("source series");
13129 let expected = source.to_frame(None).expect("series frame");
13130
13131 let key_path = std::env::temp_dir().join(format!(
13132 "fp_io_series_hdf5_key_{}_{}.h5",
13133 std::process::id(),
13134 line!()
13135 ));
13136 source
13137 .to_hdf_key(&key_path, "series/data")
13138 .expect("series hdf key");
13139 assert!(
13140 read_hdf_key(&key_path, "series/data")
13141 .expect("read series hdf key")
13142 .equals(&expected)
13143 );
13144
13145 let default_path = std::env::temp_dir().join(format!(
13146 "fp_io_series_hdf5_default_{}_{}.h5",
13147 std::process::id(),
13148 line!()
13149 ));
13150 source
13151 .to_hdf_file(&default_path)
13152 .expect("series hdf default key");
13153 assert!(
13154 read_hdf(&default_path)
13155 .expect("read series hdf default")
13156 .equals(&expected)
13157 );
13158
13159 let options_path = std::env::temp_dir().join(format!(
13160 "fp_io_series_hdf5_options_{}_{}.h5",
13161 std::process::id(),
13162 line!()
13163 ));
13164 source
13165 .to_hdf_with_options(
13166 &options_path,
13167 &HdfWriteOptions {
13168 key: "series/options".to_owned(),
13169 },
13170 )
13171 .expect("series hdf options");
13172 assert!(
13173 read_hdf_key(&options_path, "series/options")
13174 .expect("read series hdf options")
13175 .equals(&expected)
13176 );
13177 }
13178
13179 #[test]
13180 fn series_excel_extension_aliases_roundtrip_to_single_column_frame() {
13181 use super::SeriesIoExt;
13182
13183 let source = Series::from_values(
13184 "sales",
13185 vec!["r1".into(), "r2".into()],
13186 vec![Scalar::Int64(10), Scalar::Int64(12)],
13187 )
13188 .expect("source series");
13189
13190 let bytes = source.to_excel_bytes().expect("series excel bytes");
13191 let roundtrip =
13192 read_excel_bytes(&bytes, &ExcelReadOptions::default()).expect("read series excel");
13193 let names = roundtrip
13194 .column_names()
13195 .into_iter()
13196 .map(String::as_str)
13197 .collect::<Vec<_>>();
13198 assert_eq!(names, vec!["column_0", "sales"]);
13199 assert_eq!(
13200 roundtrip.column("column_0").expect("index column").values(),
13201 &[Scalar::Utf8("r1".into()), Scalar::Utf8("r2".into())]
13202 );
13203 assert_eq!(
13204 roundtrip.column("sales").expect("sales column").values(),
13205 source.values()
13206 );
13207
13208 let frame = source.to_frame(None).expect("series frame");
13209 assert_eq!(
13210 source.to_excel_bytes().expect("trait excel bytes"),
13211 write_excel_bytes(&frame).expect("frame excel bytes")
13212 );
13213
13214 let options = ExcelWriteOptions {
13215 index: false,
13216 ..ExcelWriteOptions::default()
13217 };
13218 let no_index_bytes = source
13219 .to_excel_bytes_with_options(&options)
13220 .expect("series excel index false");
13221 let no_index = read_excel_bytes(&no_index_bytes, &ExcelReadOptions::default())
13222 .expect("read no-index series excel");
13223 assert_eq!(no_index.column_names(), vec!["sales"]);
13224 assert_eq!(no_index.index().len(), source.index().len());
13225 }
13226
13227 #[test]
13228 fn pickle_reader_rejects_malformed_and_foreign_payloads() {
13229 let err = read_pickle_bytes(b"not a pickle").expect_err("malformed pickle");
13230 assert!(matches!(err, IoError::Pickle(_)));
13231
13232 let foreign = serde_pickle::to_vec(
13233 &serde_json::json!({"payload": {"columns": [], "index": [], "data": []}}),
13234 serde_pickle::SerOptions::new(),
13235 )
13236 .expect("foreign pickle");
13237 let err = read_pickle_bytes(&foreign).expect_err("foreign pickle");
13238 assert!(matches!(
13239 err,
13240 IoError::Pickle(message) if message.contains("format marker")
13241 ));
13242 }
13243
13244 #[cfg(feature = "hdf5")]
13245 #[test]
13246 fn hdf5_path_roundtrip_preserves_snapshot_frame() {
13247 let source = make_table_format_dataframe();
13248 let path = std::env::temp_dir().join(format!(
13249 "fp_io_hdf5_default_{}_{}.h5",
13250 std::process::id(),
13251 line!()
13252 ));
13253
13254 write_hdf(&source, &path).expect("write hdf default key");
13255 let roundtrip = read_hdf(&path).expect("read hdf default key");
13256
13257 assert_eq!(
13258 write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
13259 write_json_string(&source, JsonOrient::Split).expect("source json")
13260 );
13261 }
13262
13263 #[cfg(feature = "hdf5")]
13264 #[test]
13265 fn hdf5_custom_key_and_extension_aliases_roundtrip() {
13266 use super::DataFrameIoExt;
13267
13268 let source = make_test_dataframe();
13269 let free_path = std::env::temp_dir().join(format!(
13270 "fp_io_hdf5_custom_free_{}_{}.h5",
13271 std::process::id(),
13272 line!()
13273 ));
13274 let trait_path = std::env::temp_dir().join(format!(
13275 "fp_io_hdf5_custom_trait_{}_{}.h5",
13276 std::process::id(),
13277 line!()
13278 ));
13279 let default_path = std::env::temp_dir().join(format!(
13280 "fp_io_hdf5_custom_default_{}_{}.h5",
13281 std::process::id(),
13282 line!()
13283 ));
13284 let write_options = HdfWriteOptions {
13285 key: "tables/snapshot".to_owned(),
13286 };
13287
13288 write_hdf_with_options(&source, &free_path, &write_options).expect("write custom key");
13289 let roundtrip = read_hdf_with_options(
13290 &free_path,
13291 &HdfReadOptions {
13292 key: "/tables/snapshot/".to_owned(),
13293 },
13294 )
13295 .expect("read custom key with slash aliases");
13296 assert!(roundtrip.equals(&source));
13297
13298 source
13299 .to_hdf_key(&trait_path, "nested/frame")
13300 .expect("trait hdf key");
13301 assert!(
13302 read_hdf_key(&trait_path, "nested/frame")
13303 .expect("read trait hdf key")
13304 .equals(&source)
13305 );
13306
13307 source
13308 .to_hdf_file(&default_path)
13309 .expect("trait hdf default key");
13310 assert!(
13311 read_hdf(&default_path)
13312 .expect("read trait hdf default")
13313 .equals(&source)
13314 );
13315 }
13316
13317 #[cfg(feature = "hdf5")]
13318 #[test]
13319 fn hdf5_row_multiindex_roundtrip_restores_logical_row_axis() {
13320 let frame = make_row_multiindex_test_dataframe();
13321 let path = std::env::temp_dir().join(format!(
13322 "fp_io_hdf5_multiindex_{}_{}.h5",
13323 std::process::id(),
13324 line!()
13325 ));
13326
13327 write_hdf_key(&frame, &path, "axes/frame").expect("write hdf multiindex");
13328 let roundtrip = read_hdf_key(&path, "axes/frame").expect("read hdf multiindex");
13329
13330 assert!(roundtrip.equals(&frame));
13331 assert!(roundtrip.column("__index_level_0__").is_none());
13332 assert_eq!(
13333 roundtrip
13334 .row_multiindex()
13335 .expect("row multiindex should be restored")
13336 .get_level_values(0)
13337 .unwrap()
13338 .labels(),
13339 frame
13340 .row_multiindex()
13341 .expect("source row multiindex")
13342 .get_level_values(0)
13343 .unwrap()
13344 .labels()
13345 );
13346 }
13347
13348 #[cfg(feature = "hdf5")]
13349 #[test]
13350 fn hdf5_reader_rejects_invalid_keys_and_missing_payloads() {
13351 let frame = make_test_dataframe();
13352 let path = std::env::temp_dir().join(format!(
13353 "fp_io_hdf5_missing_payload_{}_{}.h5",
13354 std::process::id(),
13355 line!()
13356 ));
13357
13358 let file = hdf5::File::create(&path).expect("create hdf shell");
13359 file.create_group("frame")
13360 .expect("create empty frame group");
13361 file.flush().expect("flush hdf shell");
13362 drop(file);
13363
13364 let err = read_hdf(&path).expect_err("missing payload should fail");
13365 assert!(matches!(
13366 err,
13367 IoError::Hdf5(message) if message.contains("missing FrankenPandas payload dataset")
13368 ));
13369
13370 let err = write_hdf_key(&frame, &path, "../bad").expect_err("invalid key should fail");
13371 assert!(matches!(
13372 err,
13373 IoError::Hdf5(message) if message.contains("invalid hdf5 key")
13374 ));
13375 }
13376
13377 fn make_stata_dataframe() -> DataFrame {
13378 let mut columns = BTreeMap::new();
13379 columns.insert(
13380 "id".to_owned(),
13381 Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
13382 .expect("id column"),
13383 );
13384 columns.insert(
13385 "score".to_owned(),
13386 Column::from_values(vec![
13387 Scalar::Float64(1.5),
13388 Scalar::Null(NullKind::NaN),
13389 Scalar::Float64(3.25),
13390 ])
13391 .expect("score column"),
13392 );
13393 columns.insert(
13394 "flag".to_owned(),
13395 Column::from_values(vec![
13396 Scalar::Bool(true),
13397 Scalar::Bool(false),
13398 Scalar::Bool(true),
13399 ])
13400 .expect("flag column"),
13401 );
13402 columns.insert(
13403 "label".to_owned(),
13404 Column::from_values(vec![
13405 Scalar::Utf8("alpha".to_owned()),
13406 Scalar::Utf8("beta".to_owned()),
13407 Scalar::Utf8("gamma".to_owned()),
13408 ])
13409 .expect("label column"),
13410 );
13411
13412 DataFrame::new_with_column_order(
13413 Index::new(vec![
13414 IndexLabel::Utf8("row_a".to_owned()),
13415 IndexLabel::Utf8("row_b".to_owned()),
13416 IndexLabel::Utf8("row_c".to_owned()),
13417 ]),
13418 columns,
13419 vec![
13420 "id".to_owned(),
13421 "score".to_owned(),
13422 "flag".to_owned(),
13423 "label".to_owned(),
13424 ],
13425 )
13426 .expect("stata frame")
13427 }
13428
13429 #[test]
13430 fn stata_bytes_roundtrip_preserves_supported_columns() {
13431 let source = make_stata_dataframe();
13432 let bytes = write_stata_bytes(&source).expect("write stata bytes");
13433 assert!(!bytes.is_empty());
13434
13435 let roundtrip = read_stata_bytes(&bytes).expect("read stata bytes");
13436
13437 assert_eq!(
13438 roundtrip
13439 .column_names()
13440 .into_iter()
13441 .map(String::as_str)
13442 .collect::<Vec<_>>(),
13443 vec!["index", "id", "score", "flag", "label"]
13444 );
13445 assert_eq!(
13446 roundtrip.column("index").expect("index").values(),
13447 &[
13448 Scalar::Utf8("row_a".to_owned()),
13449 Scalar::Utf8("row_b".to_owned()),
13450 Scalar::Utf8("row_c".to_owned())
13451 ]
13452 );
13453 assert_eq!(
13454 roundtrip.column("id").expect("id").values(),
13455 &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
13456 );
13457 assert_eq!(
13458 roundtrip.column("score").expect("score").values(),
13459 &[
13460 Scalar::Float64(1.5),
13461 Scalar::Null(NullKind::NaN),
13462 Scalar::Float64(3.25)
13463 ]
13464 );
13465 assert_eq!(
13466 roundtrip.column("flag").expect("flag").values(),
13467 &[Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(1)]
13468 );
13469 assert_eq!(
13470 roundtrip.column("label").expect("label").values(),
13471 &[
13472 Scalar::Utf8("alpha".to_owned()),
13473 Scalar::Utf8("beta".to_owned()),
13474 Scalar::Utf8("gamma".to_owned())
13475 ]
13476 );
13477 }
13478
13479 #[test]
13480 fn stata_path_reader_matches_bytes_reader() {
13481 let source = make_stata_dataframe();
13482 let path = std::env::temp_dir().join(format!(
13483 "fp_io_stata_reader_{}_{}.dta",
13484 std::process::id(),
13485 line!()
13486 ));
13487
13488 write_stata(&source, &path).expect("write stata path");
13489
13490 let via_path = read_stata(&path).expect("read stata path");
13491 let via_bytes =
13492 read_stata_bytes(&std::fs::read(&path).expect("read stata bytes from path"))
13493 .expect("read stata bytes");
13494
13495 assert_eq!(via_path.column_names(), via_bytes.column_names());
13496 for name in via_path.column_names() {
13497 assert_eq!(
13498 via_path.column(name).expect("path column").values(),
13499 via_bytes.column(name).expect("bytes column").values()
13500 );
13501 }
13502 }
13503
13504 #[test]
13505 fn stata_extension_aliases_and_no_index_option_roundtrip() {
13506 use super::DataFrameIoExt;
13507
13508 let source = make_stata_dataframe();
13509 let options = StataWriteOptions {
13510 include_index: false,
13511 index_label: Some("ignored".to_owned()),
13512 };
13513 let bytes = source
13514 .to_stata_bytes_with_options(&options)
13515 .expect("trait stata bytes without index");
13516 let roundtrip = read_stata_bytes(&bytes).expect("read no-index stata");
13517
13518 assert_eq!(
13519 roundtrip
13520 .column_names()
13521 .into_iter()
13522 .map(String::as_str)
13523 .collect::<Vec<_>>(),
13524 vec!["id", "score", "flag", "label"]
13525 );
13526
13527 let path = std::env::temp_dir().join(format!(
13528 "fp_io_stata_trait_{}_{}.dta",
13529 std::process::id(),
13530 line!()
13531 ));
13532 source
13533 .to_stata_with_options(&path, &options)
13534 .expect("trait stata path without index");
13535 let via_path = read_stata(&path).expect("read trait stata path");
13536 assert_eq!(via_path.column_names(), roundtrip.column_names());
13537
13538 assert_eq!(
13539 source.to_stata_bytes().expect("trait stata bytes"),
13540 write_stata_bytes(&source).expect("free stata bytes")
13541 );
13542 }
13543
13544 #[test]
13545 fn stata_writer_rejects_invalid_variable_names_and_malformed_input() {
13546 let mut columns = BTreeMap::new();
13547 columns.insert(
13548 "bad-name".to_owned(),
13549 Column::from_values(vec![Scalar::Int64(1)]).expect("bad column"),
13550 );
13551 let frame = DataFrame::new_with_column_order(
13552 Index::from_i64(vec![0]),
13553 columns,
13554 vec!["bad-name".to_owned()],
13555 )
13556 .expect("frame with invalid stata column");
13557
13558 let err = write_stata_bytes(&frame).expect_err("invalid stata variable name");
13559 assert!(matches!(
13560 err,
13561 IoError::Stata(message) if message.contains("invalid Stata variable name")
13562 ));
13563
13564 let source = make_stata_dataframe();
13565 let err = write_stata_bytes_with_options(
13566 &source,
13567 &StataWriteOptions {
13568 include_index: true,
13569 index_label: Some("1bad".to_owned()),
13570 },
13571 )
13572 .expect_err("invalid index variable name");
13573 assert!(matches!(
13574 err,
13575 IoError::Stata(message) if message.contains("first character")
13576 ));
13577
13578 let err = read_stata_bytes(b"not a dta").expect_err("malformed stata");
13579 assert!(matches!(err, IoError::Stata(_)));
13580 }
13581
13582 #[test]
13583 fn xml_writer_defaults_to_index_and_escapes_values() {
13584 let frame = make_table_format_dataframe();
13585
13586 let out = write_xml_string(&frame).expect("xml");
13587
13588 assert_eq!(
13589 out,
13590 concat!(
13591 "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13592 "<data>\n",
13593 " <row>\n",
13594 " <row>r&1</row>\n",
13595 " <name>A|B</name>\n",
13596 " <value/>\n",
13597 " </row>\n",
13598 " <row>\n",
13599 " <row>r_2</row>\n",
13600 " <name>under_score</name>\n",
13601 " <value>2.0</value>\n",
13602 " </row>\n",
13603 "</data>\n",
13604 )
13605 );
13606 }
13607
13608 #[test]
13609 fn xml_writer_options_can_omit_index_and_reject_bad_names() {
13610 let frame = make_table_format_dataframe();
13611
13612 let out = write_xml_string_with_options(
13613 &frame,
13614 &XmlWriteOptions {
13615 include_index: false,
13616 root_name: "records".to_owned(),
13617 row_name: "entry".to_owned(),
13618 index_label: Some("ignored".to_owned()),
13619 },
13620 )
13621 .expect("xml");
13622
13623 assert_eq!(
13624 out,
13625 concat!(
13626 "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13627 "<records>\n",
13628 " <entry>\n",
13629 " <name>A|B</name>\n",
13630 " <value/>\n",
13631 " </entry>\n",
13632 " <entry>\n",
13633 " <name>under_score</name>\n",
13634 " <value>2.0</value>\n",
13635 " </entry>\n",
13636 "</records>\n",
13637 )
13638 );
13639
13640 let err = write_xml_string_with_options(
13641 &frame,
13642 &XmlWriteOptions {
13643 root_name: "bad name".to_owned(),
13644 ..Default::default()
13645 },
13646 )
13647 .expect_err("invalid xml name");
13648 assert!(matches!(err, IoError::Xml(message) if message.contains("bad name")));
13649 }
13650
13651 #[test]
13652 fn xml_writer_escapes_text_like_pandas_etree() {
13653 let mut columns = BTreeMap::new();
13654 columns.insert(
13655 "name".to_owned(),
13656 Column::from_values(vec![Scalar::Utf8(
13657 "A&B <tag> \"quote\" it's\r\nnext".to_owned(),
13658 )])
13659 .expect("name column"),
13660 );
13661 let frame = DataFrame::new_with_column_order(
13662 Index::new(vec![IndexLabel::Utf8("idx".to_owned())]),
13663 columns,
13664 vec!["name".to_owned()],
13665 )
13666 .expect("xml escape frame");
13667
13668 assert_eq!(
13669 write_xml_string_with_options(
13670 &frame,
13671 &XmlWriteOptions {
13672 include_index: false,
13673 ..Default::default()
13674 },
13675 )
13676 .expect("xml"),
13677 concat!(
13678 "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13679 "<data>\n",
13680 " <row>\n",
13681 " <name>A&B <tag> \"quote\" it's\n",
13682 "next</name>\n",
13683 " </row>\n",
13684 "</data>\n",
13685 )
13686 );
13687 }
13688
13689 #[test]
13690 fn xml_writer_file_output_and_extension_aliases_match_free_functions() {
13691 use super::DataFrameIoExt;
13692
13693 let frame = make_table_format_dataframe();
13694 let path = std::env::temp_dir().join(format!(
13695 "fp_io_xml_writer_{}_{}.xml",
13696 std::process::id(),
13697 line!()
13698 ));
13699
13700 write_xml(&frame, &path).expect("write xml");
13701 assert_eq!(
13702 std::fs::read_to_string(&path).expect("read xml"),
13703 write_xml_string(&frame).expect("xml string")
13704 );
13705 assert_eq!(
13706 frame.to_xml_string().expect("trait xml string"),
13707 write_xml_string(&frame).expect("free xml string")
13708 );
13709
13710 let trait_path = std::env::temp_dir().join(format!(
13711 "fp_io_xml_writer_trait_alias_{}_{}.xml",
13712 std::process::id(),
13713 line!()
13714 ));
13715 frame.to_xml(&trait_path).expect("trait xml alias");
13716 assert_eq!(
13717 std::fs::read_to_string(&trait_path).expect("read trait xml alias"),
13718 write_xml_string(&frame).expect("free xml string")
13719 );
13720
13721 let no_index_options = XmlWriteOptions {
13722 include_index: false,
13723 ..Default::default()
13724 };
13725 let no_index_path = std::env::temp_dir().join(format!(
13726 "fp_io_xml_writer_no_index_{}_{}.xml",
13727 std::process::id(),
13728 line!()
13729 ));
13730 frame
13731 .to_xml_file_with_options(&no_index_path, &no_index_options)
13732 .expect("trait xml file");
13733 assert_eq!(
13734 std::fs::read_to_string(&no_index_path).expect("read trait xml"),
13735 write_xml_string_with_options(&frame, &no_index_options).expect("free xml options")
13736 );
13737 }
13738
13739 #[test]
13740 fn xml_reader_parses_pandas_row_shape_and_empty_values() {
13741 let xml = concat!(
13742 "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13743 "<data>\n",
13744 " <row>\n",
13745 " <index>0</index>\n",
13746 " <a>1</a>\n",
13747 " <b/>\n",
13748 " </row>\n",
13749 " <row>\n",
13750 " <index>1</index>\n",
13751 " <a>2.5</a>\n",
13752 " <b>x</b>\n",
13753 " </row>\n",
13754 "</data>\n",
13755 );
13756
13757 let frame = read_xml_str(xml).expect("read xml");
13758
13759 assert_eq!(
13760 frame
13761 .column_names()
13762 .into_iter()
13763 .map(String::as_str)
13764 .collect::<Vec<_>>(),
13765 vec!["index", "a", "b"]
13766 );
13767 assert_eq!(
13768 frame.index().labels(),
13769 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
13770 );
13771 assert_eq!(
13772 frame.column("index").expect("index").values()[0],
13773 Scalar::Int64(0)
13774 );
13775 assert_eq!(
13776 frame.column("a").expect("a").values()[1],
13777 Scalar::Float64(2.5)
13778 );
13779 assert!(matches!(
13780 frame.column("b").expect("b").values()[0],
13781 Scalar::Null(NullKind::Null)
13782 ));
13783 assert_eq!(
13784 frame.column("b").expect("b").values()[1],
13785 Scalar::Utf8("x".to_owned())
13786 );
13787 }
13788
13789 #[test]
13790 fn xml_reader_roundtrips_writer_output_as_columns() {
13791 let source = make_table_format_dataframe();
13792 let xml = write_xml_string(&source).expect("write xml");
13793
13794 let frame = read_xml_str(&xml).expect("read writer xml");
13795
13796 assert_eq!(
13797 frame
13798 .column_names()
13799 .into_iter()
13800 .map(String::as_str)
13801 .collect::<Vec<_>>(),
13802 vec!["row", "name", "value"]
13803 );
13804 assert_eq!(
13805 frame.column("row").expect("row").values()[0],
13806 Scalar::Utf8("r&1".to_owned())
13807 );
13808 assert_eq!(
13809 frame.column("name").expect("name").values()[0],
13810 Scalar::Utf8("A|B".to_owned())
13811 );
13812 assert!(frame.column("value").expect("value").values()[0].is_missing());
13813 assert_eq!(
13814 frame.column("value").expect("value").values()[1],
13815 Scalar::Float64(2.0)
13816 );
13817 }
13818
13819 #[test]
13820 fn xml_reader_unescapes_text_and_supports_custom_row_names() {
13821 let xml = concat!(
13822 "<records>\n",
13823 " <entry><name>A&B <tag> \"quote\" it's</name><flag>True</flag></entry>\n",
13824 " <entry><name>line\n",
13825 "next</name><flag>false</flag></entry>\n",
13826 "</records>\n",
13827 );
13828
13829 let frame = read_xml_str_with_options(
13830 xml,
13831 &XmlReadOptions {
13832 row_name: "entry".to_owned(),
13833 },
13834 )
13835 .expect("read custom xml");
13836
13837 assert_eq!(
13838 frame.column("name").expect("name").values()[0],
13839 Scalar::Utf8("A&B <tag> \"quote\" it's".to_owned())
13840 );
13841 assert_eq!(
13842 frame.column("name").expect("name").values()[1],
13843 Scalar::Utf8("line\nnext".to_owned())
13844 );
13845 assert_eq!(
13846 frame.column("flag").expect("flag").values()[0],
13847 Scalar::Bool(true)
13848 );
13849 assert_eq!(
13850 frame.column("flag").expect("flag").values()[1],
13851 Scalar::Bool(false)
13852 );
13853 }
13854
13855 #[test]
13856 fn xml_reader_path_reader_matches_string_reader() {
13857 use std::io::Write;
13858
13859 let xml = "<data><row><name>A</name></row></data>\n";
13860 let path = std::env::temp_dir().join(format!(
13861 "fp_io_xml_reader_{}_{}.xml",
13862 std::process::id(),
13863 line!()
13864 ));
13865 let mut file = std::fs::OpenOptions::new()
13866 .write(true)
13867 .create_new(true)
13868 .open(&path)
13869 .expect("create xml fixture");
13870 file.write_all(xml.as_bytes()).expect("write xml fixture");
13871
13872 let via_path = read_xml(&path).expect("read path xml");
13873 let via_str = read_xml_str(xml).expect("read string xml");
13874
13875 assert_eq!(via_path.column_names(), via_str.column_names());
13876 assert_eq!(
13877 via_path.column("name").expect("path name").values(),
13878 via_str.column("name").expect("str name").values()
13879 );
13880 }
13881
13882 #[test]
13883 fn xml_reader_rejects_malformed_nested_and_duplicate_fields() {
13884 let malformed = "<data><row><name>A</row></data>";
13885 assert!(matches!(read_xml_str(malformed), Err(IoError::Xml(_))));
13886
13887 let nested = "<data><row><name><inner>A</inner></name></row></data>";
13888 let err = read_xml_str(nested).expect_err("nested field error");
13889 assert!(matches!(err, IoError::Xml(message) if message.contains("nested xml element")));
13890
13891 let duplicate = "<data><row><name>A</name><name>B</name></row></data>";
13892 let err = read_xml_str(duplicate).expect_err("duplicate field error");
13893 assert!(matches!(err, IoError::Xml(message) if message.contains("duplicate xml field")));
13894 }
13895
13896 #[test]
13899 fn test_csv_vec_based_column_order() {
13900 let input = "alpha,bravo,charlie\n1,2,3\n4,5,6\n";
13903 let frame = read_csv_str(input).expect("parse");
13904 let keys: Vec<&String> = frame.columns().keys().collect();
13905 assert_eq!(keys, &["alpha", "bravo", "charlie"]);
13906 assert_eq!(frame.column("alpha").unwrap().values()[0], Scalar::Int64(1));
13907 assert_eq!(frame.column("bravo").unwrap().values()[0], Scalar::Int64(2));
13908 assert_eq!(
13909 frame.column("charlie").unwrap().values()[1],
13910 Scalar::Int64(6)
13911 );
13912 eprintln!("[TEST] test_csv_vec_based_column_order | rows=2 cols=3 parse_ok=true | PASS");
13913 }
13914
13915 #[test]
13916 fn read_csv_object_fallback_preserves_original_text() {
13917 let cases: &[(&str, &str)] = &[
13924 ("c\n1\n2\n3\n", "c\n1\n2\n3\n"),
13926 ("c\ntrue\nfalse\n", "c\nTrue\nFalse\n"), ("c\n1\n2\nabc\n", "c\n1\n2\nabc\n"),
13929 ("c\ntrue\nfalse\nmaybe\n", "c\ntrue\nfalse\nmaybe\n"),
13930 ("c\nTrue\nFalse\nmaybe\n", "c\nTrue\nFalse\nmaybe\n"),
13931 ("c\n01\n02\nabc\n", "c\n01\n02\nabc\n"),
13932 ];
13933 for (input, expected_csv) in cases {
13934 let frame = read_csv_str(input).expect("read");
13935 let out = write_csv_string(&frame).expect("write");
13936 assert_eq!(
13937 &out, expected_csv,
13938 "round-trip mismatch for input {input:?}"
13939 );
13940 }
13941 }
13942
13943 #[test]
13944 fn read_csv_typed_numeric_fast_path_promotes_in_one_pass() {
13945 let input = "i,f\n1,0\n2,0.5\n3,1.25\n";
13946 let headers = vec!["i".to_owned(), "f".to_owned()];
13947
13948 let fast = super::try_read_csv_str_typed_numeric(input, &headers)
13949 .expect("typed parse")
13950 .expect("numeric fast path");
13951 assert_eq!(fast.column("i").expect("i").dtype(), DType::Int64);
13952 assert_eq!(fast.column("f").expect("f").dtype(), DType::Float64);
13953 assert_eq!(fast.column("i").expect("i").values()[2], Scalar::Int64(3));
13954 assert_eq!(
13955 fast.column("f").expect("f").values()[2],
13956 Scalar::Float64(1.25)
13957 );
13958
13959 let object_input = "c\n01\nabc\n";
13960 let object_headers = vec!["c".to_owned()];
13961 assert!(
13962 super::try_read_csv_str_typed_numeric(object_input, &object_headers)
13963 .expect("object probe")
13964 .is_none()
13965 );
13966 let object_frame = read_csv_str(object_input).expect("fallback read");
13967 assert_eq!(object_frame.index().int64_unit_range_labels(), Some((0, 2)));
13968 assert_eq!(
13969 write_csv_string(&object_frame).expect("fallback write"),
13970 object_input
13971 );
13972 }
13973
13974 #[test]
13975 fn read_csv_simple_typed_numeric_fast_path_builds_typed_columns() {
13976 let input = "i,f\n1,0\n2,0.5\n3,1.25\n";
13977 let headers = vec!["i".to_owned(), "f".to_owned()];
13978
13979 let fast = super::try_read_csv_str_simple_typed_numeric(input, &headers)
13980 .expect("simple typed parse")
13981 .expect("simple numeric fast path");
13982
13983 assert_eq!(fast.column("i").expect("i").dtype(), DType::Int64);
13984 assert_eq!(fast.column("f").expect("f").dtype(), DType::Float64);
13985 assert_eq!(fast.index().int64_unit_range_labels(), Some((0, 3)));
13986 assert_eq!(fast.index().labels()[2], IndexLabel::Int64(2));
13987 assert_eq!(fast.column("i").expect("i").values()[2], Scalar::Int64(3));
13988 assert_eq!(
13989 fast.column("f").expect("f").values()[2],
13990 Scalar::Float64(1.25)
13991 );
13992 }
13993
13994 #[test]
13995 fn read_csv_simple_typed_numeric_fast_path_rejects_quoted_fields() {
13996 let input = "x\n\"1.5\"\n";
13997 let headers = vec!["x".to_owned()];
13998
13999 assert!(
14000 super::try_read_csv_str_simple_typed_numeric(input, &headers)
14001 .expect("simple probe")
14002 .is_none()
14003 );
14004
14005 let frame = read_csv_str(input).expect("fallback read");
14006 assert_eq!(frame.column("x").expect("x").dtype(), DType::Float64);
14007 assert_eq!(
14008 frame.column("x").expect("x").values(),
14009 &[Scalar::Float64(1.5)]
14010 );
14011 }
14012
14013 #[test]
14014 fn read_csv_simple_typed_numeric_fast_path_accepts_crlf_rows() {
14015 let input = "i,f\n1,0\r\n2,0.5\r\n";
14016 let headers = vec!["i".to_owned(), "f".to_owned()];
14017
14018 let fast = super::try_read_csv_str_simple_typed_numeric(input, &headers)
14019 .expect("simple typed parse")
14020 .expect("simple numeric fast path");
14021
14022 assert_eq!(fast.len(), 2);
14023 assert_eq!(fast.column("i").expect("i").values()[1], Scalar::Int64(2));
14024 assert_eq!(
14025 fast.column("f").expect("f").values()[1],
14026 Scalar::Float64(0.5)
14027 );
14028 }
14029
14030 #[test]
14031 fn read_csv_simple_parallel_chunks_preserve_order_and_promotion() {
14032 use std::fmt::Write as _;
14033
14034 let headers = vec!["i".to_owned(), "f".to_owned()];
14035 let mut data = String::new();
14036 for row in 0..32 {
14037 let float_value = row as f64 * 0.25;
14038 writeln!(data, "{row},{float_value}").expect("write row");
14039 }
14040
14041 let (columns, row_count) =
14042 super::parse_simple_numeric_csv_parallel_chunks(data.as_bytes(), headers.len(), 3)
14043 .expect("parallel chunk parse");
14044 let frame = super::build_typed_numeric_csv_frame(&headers, columns, row_count)
14045 .expect("frame build");
14046
14047 assert_eq!(frame.len(), 32);
14048 assert_eq!(frame.column("i").expect("i").dtype(), DType::Int64);
14049 assert_eq!(frame.column("f").expect("f").dtype(), DType::Float64);
14050 assert_eq!(frame.column("i").expect("i").values()[0], Scalar::Int64(0));
14051 assert_eq!(
14052 frame.column("i").expect("i").values()[31],
14053 Scalar::Int64(31)
14054 );
14055 assert_eq!(
14056 frame.column("f").expect("f").values()[31],
14057 Scalar::Float64(7.75)
14058 );
14059 }
14060
14061 #[test]
14062 fn read_csv_typed_numeric_fast_path_rejects_non_all_valid_numeric_semantics() {
14063 let headers = vec!["x".to_owned()];
14064 for input in [
14065 "x\nNaN\n1.0\n",
14066 "x\nNAN\n1.0\n",
14067 "x\n+NaN\n1.0\n",
14068 "x\n NaN \n1.0\n",
14069 "x\n\"NAN\"\n1.0\n",
14070 "x\ntrue\nfalse\n",
14071 "x\n \n1\n",
14072 ] {
14073 assert!(
14074 super::try_read_csv_str_typed_numeric(input, &headers)
14075 .expect("probe")
14076 .is_none(),
14077 "fast path must reject {input:?}"
14078 );
14079 }
14080
14081 let nan_frame = read_csv_str("x\nNaN\n1.0\n").expect("fallback nan");
14082 assert_eq!(
14083 nan_frame.column("x").expect("x").values(),
14084 &[Scalar::Null(NullKind::NaN), Scalar::Float64(1.0)]
14085 );
14086
14087 let bool_frame = read_csv_str("x\ntrue\nfalse\n").expect("fallback bool");
14088 assert_eq!(
14089 bool_frame.column("x").expect("x").values(),
14090 &[Scalar::Bool(true), Scalar::Bool(false)]
14091 );
14092
14093 let padded_nan_frame = read_csv_str("x\n NaN \n1.0\n").expect("fallback padded nan");
14094 let padded_nan_column = padded_nan_frame.column("x").expect("x");
14095 assert!(padded_nan_column.has_nulls());
14096 assert!(!padded_nan_column.validity().get(0));
14097 assert!(padded_nan_column.values()[0].is_missing());
14098 }
14099
14100 #[test]
14101 fn read_csv_typed_numeric_fast_path_keeps_all_valid_float_edges() {
14102 let input = "x\n-0.0\ninf\n-inf\n1\n";
14103 let headers = vec!["x".to_owned()];
14104 let frame = super::try_read_csv_str_typed_numeric(input, &headers)
14105 .expect("typed parse")
14106 .expect("numeric fast path");
14107 let column = frame.column("x").expect("x");
14108
14109 assert_eq!(column.dtype(), DType::Float64);
14110 assert!(!column.has_nulls());
14111 assert!(column.validity().all());
14112 let values = column.values();
14113 assert_eq!(
14114 values,
14115 &[
14116 Scalar::Float64(-0.0),
14117 Scalar::Float64(f64::INFINITY),
14118 Scalar::Float64(f64::NEG_INFINITY),
14119 Scalar::Float64(1.0)
14120 ]
14121 );
14122 let negative_zero_bits = match values[0] {
14123 Scalar::Float64(value) => Some(value.to_bits()),
14124 _ => None,
14125 };
14126 assert_eq!(negative_zero_bits, Some((-0.0f64).to_bits()));
14127 }
14128
14129 #[test]
14130 fn read_csv_typed_numeric_fast_path_preserves_ragged_row_errors() {
14131 let long_row = "a,b\n1,2,3\n";
14132 let err = read_csv_str(long_row).expect_err("long row must reject");
14133 assert!(matches!(err, IoError::Csv(_)), "got {err:?}");
14134 }
14135
14136 #[test]
14137 fn to_csv_datetime_is_column_uniform_like_pandas() {
14138 fn dt_frame(nanos: &[i64]) -> DataFrame {
14144 let values: Vec<Scalar> = nanos.iter().map(|&n| Scalar::Datetime64(n)).collect();
14145 let col = Column::new(DType::Datetime64, values).expect("col");
14146 let mut cols = BTreeMap::new();
14147 cols.insert("d".to_string(), col);
14148 let index = Index::from_i64((0..nanos.len() as i64).collect());
14149 DataFrame::new_with_column_order(index, cols, vec!["d".to_string()]).expect("frame")
14150 }
14151 const MIDNIGHT_JAN1: i64 = 1_577_836_800_000_000_000; const MIDNIGHT_JAN2: i64 = 1_577_923_200_000_000_000; const JAN2_0300: i64 = 1_577_934_000_000_000_000; const JAN1_HALF: i64 = 1_577_836_800_500_000_000; assert_eq!(
14158 write_csv_string(&dt_frame(&[MIDNIGHT_JAN1, MIDNIGHT_JAN2])).expect("w"),
14159 "d\n2020-01-01\n2020-01-02\n"
14160 );
14161 assert_eq!(
14163 write_csv_string(&dt_frame(&[JAN1_HALF, MIDNIGHT_JAN1])).expect("w"),
14164 "d\n2020-01-01 00:00:00.500\n2020-01-01 00:00:00.000\n"
14165 );
14166 assert_eq!(
14168 write_csv_string(&dt_frame(&[JAN2_0300, MIDNIGHT_JAN1])).expect("w"),
14169 "d\n2020-01-02 03:00:00\n2020-01-01 00:00:00\n"
14170 );
14171 assert_eq!(
14173 write_csv_string(&dt_frame(&[MIDNIGHT_JAN1, i64::MIN])).expect("w"),
14174 "d\n2020-01-01\n\"\"\n"
14175 );
14176 }
14177
14178 #[test]
14179 fn to_csv_datetime_index_is_column_uniform_like_pandas() {
14180 use super::{CsvWriteOptions, write_csv_string_with_options};
14181 fn dt_index_frame(nanos: &[i64]) -> DataFrame {
14184 let labels: Vec<IndexLabel> =
14185 nanos.iter().map(|&n| IndexLabel::Datetime64(n)).collect();
14186 let index = Index::new(labels);
14187 let values: Vec<Scalar> = (0..nanos.len() as i64).map(Scalar::Int64).collect();
14188 let col = Column::new(DType::Int64, values).expect("col");
14189 let mut cols = BTreeMap::new();
14190 cols.insert("v".to_string(), col);
14191 DataFrame::new_with_column_order(index, cols, vec!["v".to_string()]).expect("frame")
14192 }
14193 let opts = CsvWriteOptions {
14194 include_index: true,
14195 ..Default::default()
14196 };
14197 assert_eq!(
14199 write_csv_string_with_options(
14200 &dt_index_frame(&[1_577_836_800_000_000_000, 1_577_923_200_000_000_000]),
14201 &opts
14202 )
14203 .expect("w"),
14204 ",v\n2020-01-01,0\n2020-01-02,1\n"
14205 );
14206 assert_eq!(
14208 write_csv_string_with_options(
14209 &dt_index_frame(&[1_577_836_800_500_000_000, 1_577_836_800_250_000_000]),
14210 &opts
14211 )
14212 .expect("w"),
14213 ",v\n2020-01-01 00:00:00.500,0\n2020-01-01 00:00:00.250,1\n"
14214 );
14215 }
14216
14217 #[test]
14218 fn read_csv_with_options_object_fallback_preserves_text() {
14219 use super::{CsvReadOptions, read_csv_with_options};
14220 let tsv = CsvReadOptions {
14224 delimiter: b'\t',
14225 ..Default::default()
14226 };
14227 let frame = read_csv_with_options("c\ntrue\nfalse\nmaybe\n", &tsv).expect("read");
14228 assert_eq!(frame.index().int64_unit_range_labels(), Some((0, 3)));
14229 let out = write_csv_string(&frame).expect("write");
14230 assert_eq!(out, "c\ntrue\nfalse\nmaybe\n");
14231
14232 let frame2 = read_csv_with_options("c\n01\n02\nabc\n", &tsv).expect("read");
14233 let out2 = write_csv_string(&frame2).expect("write");
14234 assert_eq!(out2, "c\n01\n02\nabc\n");
14235
14236 let frame3 = read_csv_with_options("c\ntrue\nfalse\n", &tsv).expect("read");
14238 let out3 = write_csv_string(&frame3).expect("write");
14239 assert_eq!(out3, "c\nTrue\nFalse\n");
14240
14241 let na_opts = CsvReadOptions {
14244 delimiter: b'\t',
14245 na_values: vec!["MISSING".to_string()],
14246 ..Default::default()
14247 };
14248 let frame4 = read_csv_with_options("c\ntrue\nMISSING\nmaybe\n", &na_opts).expect("read");
14249 let out4 = write_csv_string(&frame4).expect("write");
14250 assert_eq!(out4, "c\ntrue\n\"\"\nmaybe\n");
14252 }
14253
14254 #[test]
14255 fn to_csv_float_format_matches_pandas_str() {
14256 let cases: &[(f64, &str)] = &[
14260 (1.0, "1.0"),
14261 (3.0, "3.0"),
14262 (100.0, "100.0"),
14263 (-7.0, "-7.0"),
14264 (2.5, "2.5"),
14265 (0.5, "0.5"),
14266 (0.1, "0.1"),
14267 (1.0 / 3.0, "0.3333333333333333"),
14268 (1234567890123456.0, "1234567890123456.0"),
14269 (1e16, "1e+16"),
14270 (1e20, "1e+20"),
14271 (1e-5, "1e-05"),
14272 (0.0001, "0.0001"),
14273 (1e-7, "1e-07"),
14274 (f64::INFINITY, "inf"),
14275 (f64::NEG_INFINITY, "-inf"),
14276 ];
14277 for (v, expected) in cases {
14278 assert_eq!(
14279 &format_pandas_float(*v),
14280 expected,
14281 "format_pandas_float({v})"
14282 );
14283 }
14284 }
14285
14286 #[test]
14287 fn to_csv_single_column_nan_quotes_empty_and_keeps_float_repr() {
14288 let frame = read_csv_str("x\n1.0\nNaN\n3.0\n").expect("read");
14292 assert!(frame.column("x").unwrap().values()[1].is_missing());
14293 let out = write_csv_string(&frame).expect("write");
14294 assert_eq!(out, "x\n1.0\n\"\"\n3.0\n");
14295 }
14296
14297 #[test]
14298 fn test_csv_capacity_hint_reasonable() {
14299 let mut csv = String::with_capacity(1_100_000);
14302 csv.push_str("a,b,c,d,e\n");
14303 let target_rows = 50_000; for i in 0..target_rows {
14305 csv.push_str(&format!("{},{},{},{},{}\n", i, i * 2, i * 3, i * 4, i * 5));
14306 }
14307 assert!(csv.len() > 500_000, "CSV should be large");
14308
14309 let frame = read_csv_str(&csv).expect("parse large CSV");
14310 assert_eq!(frame.index().len(), target_rows);
14311 assert_eq!(frame.columns().len(), 5);
14312 assert_eq!(
14314 frame.column("a").unwrap().values()[target_rows - 1],
14315 Scalar::Int64((target_rows - 1) as i64)
14316 );
14317 eprintln!(
14318 "[TEST] test_csv_capacity_hint_reasonable | rows={target_rows} cols=5 parse_ok=true | PASS"
14319 );
14320 }
14321
14322 #[test]
14323 fn test_csv_empty_columns() {
14324 let input = "x,y,z\n";
14326 let frame = read_csv_str(input).expect("parse");
14327 assert_eq!(frame.index().len(), 0);
14328 let keys: Vec<&String> = frame.columns().keys().collect();
14329 assert_eq!(keys, &["x", "y", "z"]);
14330 for col in frame.columns().values() {
14331 assert!(col.is_empty());
14332 }
14333 eprintln!("[TEST] test_csv_empty_columns | rows=0 cols=3 parse_ok=true | PASS");
14334 }
14335
14336 #[test]
14337 fn test_csv_comment_skips_lines() {
14338 let input = "# header comment\nname,age\n# inline comment\nalice,30\nbob,25\n";
14339 let options = CsvReadOptions {
14340 comment: Some(b'#'),
14341 ..CsvReadOptions::default()
14342 };
14343 let frame = read_csv_with_options(input, &options).expect("parse");
14344 assert_eq!(frame.index().len(), 2);
14345 let names: Vec<&String> = frame.column_names().into_iter().collect();
14346 assert_eq!(names, vec!["name", "age"]);
14347 assert_eq!(
14348 frame.column("name").unwrap().values()[0],
14349 Scalar::Utf8("alice".to_string())
14350 );
14351 assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
14352 }
14353
14354 #[test]
14355 fn test_csv_comment_none_preserves_comment_lines() {
14356 let input = "name,age\nalice,30\n";
14359 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14360 assert_eq!(frame.index().len(), 1);
14361 }
14362
14363 #[test]
14364 fn read_csv_with_default_options_matches_read_csv_str() {
14365 for input in [
14366 "i,f,s\n1,2.5,abc\n3,4.0,def\n",
14367 "flag\ntrue\nfalse\nmaybe\n",
14368 ] {
14369 let expected = read_csv_str(input).expect("default read");
14370 let actual =
14371 read_csv_with_options(input, &CsvReadOptions::default()).expect("options read");
14372
14373 assert_eq!(actual.index().len(), expected.index().len());
14374 assert_eq!(actual.column_names(), expected.column_names());
14375 for name in expected.column_names() {
14376 let expected_col = expected.column(name).expect("expected column");
14377 let actual_col = actual.column(name).expect("actual column");
14378 assert_eq!(actual_col.dtype(), expected_col.dtype());
14379 assert_eq!(actual_col.values(), expected_col.values());
14380 }
14381 }
14382 }
14383
14384 #[test]
14385 fn read_csv_str_cache_reuses_exact_successful_input() {
14386 let input = "x,y\n1,2.5\n3,4.5\n";
14387
14388 let first = read_csv_str(input).expect("first parse");
14389 let second = read_csv_str(input).expect("cached parse");
14390
14391 assert_eq!(second.index().len(), first.index().len());
14392 assert_eq!(second.column_names(), first.column_names());
14393 for name in first.column_names() {
14394 let first_col = first.column(name).expect("first column");
14395 let second_col = second.column(name).expect("second column");
14396 assert_eq!(second_col.dtype(), first_col.dtype());
14397 assert_eq!(second_col.values(), first_col.values());
14398 }
14399 }
14400
14401 #[test]
14402 fn read_csv_str_cache_is_content_addressed() {
14403 let mut input = String::from("x\n1\n2\n");
14404 let first = read_csv_str(&input).expect("first parse");
14405 assert_eq!(first.column("x").unwrap().values()[0], Scalar::Int64(1));
14406
14407 input.clear();
14408 input.push_str("x\n9\n10\n");
14409 let second = read_csv_str(&input).expect("changed-content parse");
14410
14411 assert_eq!(second.index().len(), 2);
14412 assert_eq!(second.column("x").unwrap().values()[0], Scalar::Int64(9));
14413 assert_eq!(second.column("x").unwrap().values()[1], Scalar::Int64(10));
14414 }
14415
14416 #[test]
14417 fn read_csv_no_na_cache_reuses_exact_successful_input() {
14418 let options = CsvReadOptions {
14419 na_filter: false,
14420 ..CsvReadOptions::default()
14421 };
14422 let input = "x,y\n1,2.5\n3,4.5\n";
14423
14424 let first = read_csv_with_options(input, &options).expect("first no-na parse");
14425 let second = read_csv_with_options(input, &options).expect("cached no-na parse");
14426
14427 assert_eq!(second.index().len(), first.index().len());
14428 assert_eq!(second.column_names(), first.column_names());
14429 for name in first.column_names() {
14430 let first_col = first.column(name).expect("first column");
14431 let second_col = second.column(name).expect("second column");
14432 assert_eq!(second_col.dtype(), first_col.dtype());
14433 assert_eq!(second_col.values(), first_col.values());
14434 }
14435 }
14436
14437 #[test]
14438 fn csv_parse_cache_keeps_default_and_no_na_modes_separate() {
14439 let input = "mode_sep_a,mode_sep_b\n11,12.5\n13,14.5\n";
14440 let no_na_options = CsvReadOptions {
14441 na_filter: false,
14442 ..CsvReadOptions::default()
14443 };
14444
14445 let no_na_frame = read_csv_with_options(input, &no_na_options).expect("no-na parse");
14446 assert!(super::csv_parse_cache_lookup(super::CsvParseCacheMode::Default, input).is_none());
14447 assert!(
14448 super::csv_parse_cache_lookup(super::CsvParseCacheMode::NoNaNumeric, input).is_some()
14449 );
14450
14451 let default_frame = read_csv_str(input).expect("default parse");
14452 let default_cached =
14453 super::csv_parse_cache_lookup(super::CsvParseCacheMode::Default, input)
14454 .expect("default cache entry");
14455 let no_na_cached =
14456 super::csv_parse_cache_lookup(super::CsvParseCacheMode::NoNaNumeric, input)
14457 .expect("no-na cache entry");
14458
14459 assert_eq!(default_cached.column_names(), default_frame.column_names());
14460 assert_eq!(no_na_cached.column_names(), no_na_frame.column_names());
14461 assert_eq!(
14462 default_cached.column("mode_sep_b").unwrap().values(),
14463 default_frame.column("mode_sep_b").unwrap().values()
14464 );
14465 assert_eq!(
14466 no_na_cached.column("mode_sep_b").unwrap().values(),
14467 no_na_frame.column("mode_sep_b").unwrap().values()
14468 );
14469 }
14470
14471 #[test]
14472 fn csv_default_options_fast_path_excludes_behavioral_options() {
14473 assert!(super::csv_read_options_match_default_fast_path(
14474 &CsvReadOptions::default()
14475 ));
14476
14477 for options in [
14478 CsvReadOptions {
14479 delimiter: b'\t',
14480 ..CsvReadOptions::default()
14481 },
14482 CsvReadOptions {
14483 na_filter: false,
14484 ..CsvReadOptions::default()
14485 },
14486 CsvReadOptions {
14487 nrows: Some(1),
14488 ..CsvReadOptions::default()
14489 },
14490 CsvReadOptions {
14491 comment: Some(b'#'),
14492 ..CsvReadOptions::default()
14493 },
14494 CsvReadOptions {
14495 thousands: Some(b','),
14496 ..CsvReadOptions::default()
14497 },
14498 ] {
14499 assert!(!super::csv_read_options_match_default_fast_path(&options));
14500 }
14501 }
14502
14503 #[test]
14504 fn read_csv_no_na_filter_numeric_fast_path_preserves_numeric_columns() {
14505 let options = CsvReadOptions {
14506 na_filter: false,
14507 ..CsvReadOptions::default()
14508 };
14509 assert!(super::csv_read_options_match_no_na_numeric_fast_path(
14510 &options
14511 ));
14512
14513 let frame = read_csv_with_options("i,f\n1,2.5\n3,4\n", &options).expect("parse");
14514 assert_eq!(frame.len(), 2);
14515 assert_eq!(frame.column("i").expect("i").dtype(), DType::Int64);
14516 assert_eq!(frame.column("f").expect("f").dtype(), DType::Float64);
14517 assert_eq!(frame.column("i").expect("i").values()[1], Scalar::Int64(3));
14518 assert_eq!(
14519 frame.column("f").expect("f").values()[0],
14520 Scalar::Float64(2.5)
14521 );
14522 }
14523
14524 #[test]
14525 fn read_csv_no_na_filter_fast_path_falls_back_for_empty_field() {
14526 let options = CsvReadOptions {
14527 na_filter: false,
14528 ..CsvReadOptions::default()
14529 };
14530 let frame = read_csv_with_options("x,y\n1,\n2,3\n", &options).expect("parse");
14531 let y = frame.column("y").expect("y");
14532
14533 assert_eq!(y.dtype(), DType::Utf8);
14534 assert_eq!(y.values()[0], Scalar::Utf8(String::new()));
14535 assert_eq!(y.values()[1], Scalar::Utf8("3".to_owned()));
14536 }
14537
14538 #[test]
14539 fn test_csv_comment_custom_char() {
14540 let input = "% this is ignored\nname,age\nalice,30\n";
14541 let options = CsvReadOptions {
14542 comment: Some(b'%'),
14543 ..CsvReadOptions::default()
14544 };
14545 let frame = read_csv_with_options(input, &options).expect("parse");
14546 assert_eq!(frame.index().len(), 1);
14547 assert_eq!(
14548 frame.column("name").unwrap().values()[0],
14549 Scalar::Utf8("alice".to_string())
14550 );
14551 }
14552
14553 #[test]
14554 fn test_csv_thousands_strips_int_separator() {
14555 let input = "amount\n\"1,234,567\"\n\"42\"\n";
14556 let options = CsvReadOptions {
14557 thousands: Some(b','),
14558 ..CsvReadOptions::default()
14559 };
14560 let frame = read_csv_with_options(input, &options).expect("parse");
14561 assert_eq!(
14562 frame.column("amount").unwrap().values()[0],
14563 Scalar::Int64(1234567)
14564 );
14565 assert_eq!(
14566 frame.column("amount").unwrap().values()[1],
14567 Scalar::Int64(42)
14568 );
14569 }
14570
14571 #[test]
14572 fn test_csv_thousands_strips_float_with_custom_decimal() {
14573 let input = "price\n\"1.234,56\"\n";
14575 let options = CsvReadOptions {
14576 thousands: Some(b'.'),
14577 decimal: b',',
14578 ..CsvReadOptions::default()
14579 };
14580 let frame = read_csv_with_options(input, &options).expect("parse");
14581 let v = frame.column("price").unwrap().values()[0].clone();
14582 assert!(matches!(v, Scalar::Float64(_)), "expected Float64");
14583 let Scalar::Float64(f) = v else { return };
14584 assert!((f - 1234.56).abs() < 1e-9);
14585 }
14586
14587 #[test]
14588 fn test_csv_thousands_none_keeps_separator_as_string() {
14589 let input = "amount\n\"1,234\"\n";
14591 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14592 assert_eq!(
14593 frame.column("amount").unwrap().values()[0],
14594 Scalar::Utf8("1,234".to_string())
14595 );
14596 }
14597
14598 #[test]
14599 fn test_csv_thousands_equal_to_decimal_is_ignored() {
14600 let input = "v\n\"1.234\"\n";
14602 let options = CsvReadOptions {
14603 thousands: Some(b'.'),
14604 decimal: b'.',
14605 ..CsvReadOptions::default()
14606 };
14607 let frame = read_csv_with_options(input, &options).expect("parse");
14608 let v = frame.column("v").unwrap().values()[0].clone();
14609 assert!(matches!(v, Scalar::Float64(_)), "expected Float64");
14611 let Scalar::Float64(f) = v else { return };
14612 assert!((f - 1.234).abs() < 1e-9);
14613 }
14614
14615 #[test]
14616 fn test_csv_thousands_does_not_affect_non_numeric() {
14617 let input = "name\n\"a,b\"\n";
14618 let options = CsvReadOptions {
14619 thousands: Some(b','),
14620 ..CsvReadOptions::default()
14621 };
14622 let frame = read_csv_with_options(input, &options).expect("parse");
14623 assert_eq!(
14624 frame.column("name").unwrap().values()[0],
14625 Scalar::Utf8("a,b".to_string())
14626 );
14627 }
14628
14629 #[test]
14630 fn test_csv_quotechar_custom_single_quote() {
14631 let input = "name,remark\n'alice','loves, cats'\n";
14632 let options = CsvReadOptions {
14633 quotechar: b'\'',
14634 ..CsvReadOptions::default()
14635 };
14636 let frame = read_csv_with_options(input, &options).expect("parse");
14637 assert_eq!(
14638 frame.column("name").unwrap().values()[0],
14639 Scalar::Utf8("alice".to_string())
14640 );
14641 assert_eq!(
14642 frame.column("remark").unwrap().values()[0],
14643 Scalar::Utf8("loves, cats".to_string())
14644 );
14645 }
14646
14647 #[test]
14648 fn test_csv_doublequote_true_collapses_doubled_quotes() {
14649 let input = "text\n\"she said \"\"hi\"\"\"\n";
14651 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14652 assert_eq!(
14653 frame.column("text").unwrap().values()[0],
14654 Scalar::Utf8("she said \"hi\"".to_string())
14655 );
14656 }
14657
14658 #[test]
14659 fn test_csv_doublequote_false_requires_escapechar() {
14660 let input = "text\n\"hi\\\"there\"\n";
14662 let options = CsvReadOptions {
14663 doublequote: false,
14664 escapechar: Some(b'\\'),
14665 ..CsvReadOptions::default()
14666 };
14667 let frame = read_csv_with_options(input, &options).expect("parse");
14668 assert_eq!(
14669 frame.column("text").unwrap().values()[0],
14670 Scalar::Utf8("hi\"there".to_string())
14671 );
14672 }
14673
14674 #[test]
14675 fn test_csv_lineterminator_semicolon() {
14676 let input = "a,b|1,x|2,y|3,z";
14678 let options = CsvReadOptions {
14679 lineterminator: Some(b'|'),
14680 ..CsvReadOptions::default()
14681 };
14682 let frame = read_csv_with_options(input, &options).expect("parse");
14683 assert_eq!(frame.index().len(), 3);
14684 assert_eq!(frame.column_names(), vec!["a", "b"]);
14685 assert_eq!(frame.column("a").unwrap().values()[2], Scalar::Int64(3));
14686 }
14687
14688 #[test]
14689 fn test_csv_lineterminator_default_none_accepts_crlf() {
14690 let input = "a,b\r\n1,x\r\n2,y\r\n";
14691 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14692 assert_eq!(frame.index().len(), 2);
14693 }
14694
14695 #[test]
14696 fn test_csv_lineterminator_interacts_with_skipfooter() {
14697 let input = "a|1|2|3|4|FOOTER";
14698 let options = CsvReadOptions {
14699 lineterminator: Some(b'|'),
14700 skipfooter: 1,
14701 ..CsvReadOptions::default()
14702 };
14703 let frame = read_csv_with_options(input, &options).expect("parse");
14704 assert_eq!(frame.index().len(), 4);
14706 }
14707
14708 #[test]
14709 fn test_csv_skipfooter_drops_trailing_rows() {
14710 let input = "a,b\n1,x\n2,y\n3,z\nTOTAL,summary\n";
14711 let options = CsvReadOptions {
14712 skipfooter: 1,
14713 ..CsvReadOptions::default()
14714 };
14715 let frame = read_csv_with_options(input, &options).expect("parse");
14716 assert_eq!(frame.index().len(), 3);
14717 assert_eq!(frame.column("a").unwrap().values()[2], Scalar::Int64(3));
14718 }
14719
14720 #[test]
14721 fn test_csv_skipfooter_zero_is_noop() {
14722 let input = "a,b\n1,x\n2,y\n";
14723 let frame_default =
14724 read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14725 let options = CsvReadOptions {
14726 skipfooter: 0,
14727 ..CsvReadOptions::default()
14728 };
14729 let frame_zero = read_csv_with_options(input, &options).expect("parse");
14730 assert_eq!(frame_default.index().len(), frame_zero.index().len());
14731 }
14732
14733 #[test]
14734 fn test_csv_skipfooter_larger_than_data_clears_rows() {
14735 let input = "a,b\n1,x\n2,y\n";
14736 let options = CsvReadOptions {
14737 skipfooter: 10,
14738 ..CsvReadOptions::default()
14739 };
14740 let frame = read_csv_with_options(input, &options).expect("parse");
14741 assert_eq!(frame.index().len(), 0);
14742 assert_eq!(frame.column_names().len(), 2);
14744 }
14745
14746 #[test]
14747 fn test_csv_skipfooter_with_nrows() {
14748 let input = "a\n1\n2\n3\n4\n5\n";
14750 let options = CsvReadOptions {
14751 nrows: Some(4),
14752 skipfooter: 1,
14753 ..CsvReadOptions::default()
14754 };
14755 let frame = read_csv_with_options(input, &options).expect("parse");
14756 assert_eq!(frame.index().len(), 3);
14757 }
14758
14759 #[test]
14760 fn test_csv_escapechar_none_default_keeps_backslash_literal() {
14761 let input = "text\n\"foo\\bar\"\n";
14763 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14764 assert_eq!(
14765 frame.column("text").unwrap().values()[0],
14766 Scalar::Utf8("foo\\bar".to_string())
14767 );
14768 }
14769
14770 #[test]
14771 fn test_csv_single_column() {
14772 let mut csv = String::from("value\n");
14774 for i in 0..500 {
14775 csv.push_str(&format!("{}\n", i));
14776 }
14777 let frame = read_csv_str(&csv).expect("parse");
14778 assert_eq!(frame.index().len(), 500);
14779 assert_eq!(frame.columns().len(), 1);
14780 assert_eq!(
14781 frame.column("value").unwrap().values()[499],
14782 Scalar::Int64(499)
14783 );
14784 eprintln!("[TEST] test_csv_single_column | rows=500 cols=1 parse_ok=true | PASS");
14785 }
14786
14787 #[test]
14788 fn test_csv_many_columns() {
14789 let col_count = 120;
14791 let headers: Vec<String> = (0..col_count).map(|i| format!("c{i:03}")).collect();
14792 let mut csv = headers.join(",");
14793 csv.push('\n');
14794 for row in 0..3 {
14796 let vals: Vec<String> = (0..col_count)
14797 .map(|c| format!("{}", row * 1000 + c))
14798 .collect();
14799 csv.push_str(&vals.join(","));
14800 csv.push('\n');
14801 }
14802 let frame = read_csv_str(&csv).expect("parse");
14803 assert_eq!(frame.columns().len(), col_count);
14804 assert_eq!(frame.index().len(), 3);
14805 assert_eq!(frame.column("c000").unwrap().values()[0], Scalar::Int64(0));
14807 assert_eq!(
14808 frame.column("c119").unwrap().values()[2],
14809 Scalar::Int64(2119)
14810 );
14811 eprintln!("[TEST] test_csv_many_columns | rows=3 cols={col_count} parse_ok=true | PASS");
14812 }
14813
14814 #[test]
14815 fn test_csv_mixed_dtypes() {
14816 let input = "ints,floats,strings,bools,nulls\n\
14818 1,1.5,hello,true,\n\
14819 2,2.7,world,false,\n\
14820 3,3.14,foo,true,\n";
14821 let frame = read_csv_str(input).expect("parse");
14822
14823 let ints = frame.column("ints").unwrap();
14824 assert_eq!(ints.values()[0], Scalar::Int64(1));
14825
14826 let floats = frame.column("floats").unwrap();
14827 assert_eq!(floats.values()[1], Scalar::Float64(2.7));
14828
14829 let strings = frame.column("strings").unwrap();
14830 assert_eq!(strings.values()[2], Scalar::Utf8("foo".to_owned()));
14831
14832 let bools = frame.column("bools").unwrap();
14833 assert_eq!(bools.values()[0], Scalar::Bool(true));
14834 assert_eq!(bools.values()[1], Scalar::Bool(false));
14835
14836 let nulls = frame.column("nulls").unwrap();
14838 for v in nulls.values() {
14839 assert!(v.is_missing(), "null column values should be missing");
14840 }
14841 eprintln!(
14842 "[TEST] test_csv_mixed_dtypes | rows=3 cols=5 parse_ok=true | dtype_per_col=[int64,float64,utf8,bool,null] | PASS"
14843 );
14844 }
14845
14846 #[test]
14847 fn test_csv_unicode_headers() {
14848 let input = "名前,Größe,café\nAlice,170,latte\nBob,180,espresso\n";
14850 let frame = read_csv_str(input).expect("parse");
14851 assert!(frame.column("名前").is_some());
14852 assert!(frame.column("Größe").is_some());
14853 assert!(frame.column("café").is_some());
14854 assert_eq!(
14855 frame.column("名前").unwrap().values()[0],
14856 Scalar::Utf8("Alice".to_owned())
14857 );
14858 eprintln!("[TEST] test_csv_unicode_headers | rows=2 cols=3 parse_ok=true | PASS");
14859 }
14860
14861 #[test]
14862 fn test_csv_quoted_fields() {
14863 let input =
14865 "name,address\n\"Smith, John\",\"123 Main St\nApt 4\"\nJane,\"456 Oak, Suite 1\"\n";
14866 let frame = read_csv_str(input).expect("parse");
14867 assert_eq!(frame.index().len(), 2);
14868 assert_eq!(
14869 frame.column("name").unwrap().values()[0],
14870 Scalar::Utf8("Smith, John".to_owned())
14871 );
14872 let addr0 = &frame.column("address").unwrap().values()[0];
14874 assert!(
14875 matches!(addr0, Scalar::Utf8(s) if s.contains('\n')),
14876 "expected Utf8 containing embedded newline, got {addr0:?}"
14877 );
14878 eprintln!("[TEST] test_csv_quoted_fields | rows=2 cols=2 parse_ok=true | PASS");
14879 }
14880
14881 #[test]
14882 fn test_csv_trailing_newline() {
14883 let with = "a,b\n1,2\n3,4\n";
14885 let without = "a,b\n1,2\n3,4";
14886 let f1 = read_csv_str(with).expect("with newline");
14887 let f2 = read_csv_str(without).expect("without newline");
14888
14889 assert_eq!(f1.index().len(), f2.index().len());
14890 assert_eq!(f1.columns().len(), f2.columns().len());
14891 for key in f1.columns().keys() {
14892 let c1 = f1.column(key).unwrap();
14893 let c2 = f2.column(key).unwrap();
14894 assert_eq!(c1.values(), c2.values(), "column {key} mismatch");
14895 }
14896 eprintln!("[TEST] test_csv_trailing_newline | rows=2 cols=2 parse_ok=true | PASS");
14897 }
14898
14899 #[test]
14900 fn test_csv_round_trip_unchanged() {
14901 let input = "id,name,score\n1,Alice,95.5\n2,Bob,87\n3,,100\n";
14903 let frame = read_csv_str(input).expect("read");
14904 let output = write_csv_string(&frame).expect("write");
14905 let frame2 = read_csv_str(&output).expect("re-read");
14907 assert_eq!(frame.index().len(), frame2.index().len());
14908 for key in frame.columns().keys() {
14909 let c1 = frame.column(key).unwrap();
14910 let c2 = frame2.column(key).unwrap();
14911 assert!(
14912 c1.semantic_eq(c2),
14913 "column {key} not semantically equal after round-trip"
14914 );
14915 }
14916 eprintln!("[TEST] test_csv_round_trip_unchanged | rows=3 cols=3 parse_ok=true | PASS");
14917 }
14918
14919 #[test]
14920 fn test_write_csv_options_custom_delimiter() {
14921 let input = "a,b\n1,x\n2,y\n";
14922 let frame = read_csv_str(input).expect("read");
14923 let output = write_csv_string_with_options(
14924 &frame,
14925 &CsvWriteOptions {
14926 delimiter: b';',
14927 ..CsvWriteOptions::default()
14928 },
14929 )
14930 .expect("write");
14931 assert!(output.starts_with("a;b\n"));
14932 assert!(output.contains("1;x\n"));
14933 assert!(output.contains("2;y\n"));
14934 }
14935
14936 #[test]
14937 fn test_write_csv_options_na_rep_replaces_nulls() {
14938 let input = "id,name\n1,Alice\n2,\n";
14939 let frame = read_csv_str(input).expect("read");
14940 let output = write_csv_string_with_options(
14941 &frame,
14942 &CsvWriteOptions {
14943 na_rep: "NA".to_string(),
14944 ..CsvWriteOptions::default()
14945 },
14946 )
14947 .expect("write");
14948 assert!(output.contains("2,NA\n"));
14950 assert!(!output.contains("2,\n"));
14951 }
14952
14953 #[test]
14954 fn test_write_csv_options_header_false_omits_header_row() {
14955 let input = "a,b\n1,2\n";
14956 let frame = read_csv_str(input).expect("read");
14957 let output = write_csv_string_with_options(
14958 &frame,
14959 &CsvWriteOptions {
14960 header: false,
14961 ..CsvWriteOptions::default()
14962 },
14963 )
14964 .expect("write");
14965 assert_eq!(output, "1,2\n");
14966 }
14967
14968 #[test]
14969 fn test_write_csv_options_include_index_and_index_label() {
14970 let input = "a,b\n1,2\n3,4\n";
14971 let frame = read_csv_str(input).expect("read");
14972 let output = write_csv_string_with_options(
14973 &frame,
14974 &CsvWriteOptions {
14975 include_index: true,
14976 index_label: Some("row_id".to_string()),
14977 ..CsvWriteOptions::default()
14978 },
14979 )
14980 .expect("write");
14981
14982 assert_eq!(output, "row_id,a,b\n0,1,2\n1,3,4\n");
14983 }
14984
14985 #[test]
14986 fn test_write_csv_options_include_index_uses_named_index_when_label_omitted() {
14987 let mut cols = std::collections::BTreeMap::new();
14988 cols.insert(
14989 "a".to_string(),
14990 Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
14991 );
14992 let frame = DataFrame::new_with_column_order(
14993 Index::from_i64(vec![100, 200]).set_name("sample_id"),
14994 cols,
14995 vec!["a".to_string()],
14996 )
14997 .unwrap();
14998
14999 let output = write_csv_string_with_options(
15000 &frame,
15001 &CsvWriteOptions {
15002 include_index: true,
15003 ..CsvWriteOptions::default()
15004 },
15005 )
15006 .expect("write");
15007
15008 assert_eq!(output, "sample_id,a\n100,10\n200,20\n");
15009 }
15010
15011 #[test]
15012 fn test_write_csv_options_include_index_label_overrides_index_name() {
15013 let mut cols = std::collections::BTreeMap::new();
15014 cols.insert(
15015 "a".to_string(),
15016 Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
15017 );
15018 let frame = DataFrame::new_with_column_order(
15019 Index::from_i64(vec![100, 200]).set_name("sample_id"),
15020 cols,
15021 vec!["a".to_string()],
15022 )
15023 .unwrap();
15024
15025 let output = write_csv_string_with_options(
15026 &frame,
15027 &CsvWriteOptions {
15028 include_index: true,
15029 index_label: Some("row".to_string()),
15030 ..CsvWriteOptions::default()
15031 },
15032 )
15033 .expect("write");
15034
15035 assert_eq!(output, "row,a\n100,10\n200,20\n");
15036 }
15037
15038 #[test]
15039 fn test_csv_multiindex_roundtrip_with_explicit_index_cols() {
15040 let frame = make_row_multiindex_test_dataframe();
15041 let csv = write_csv_string_with_options(
15042 &frame,
15043 &CsvWriteOptions {
15044 include_index: true,
15045 ..CsvWriteOptions::default()
15046 },
15047 )
15048 .expect("write");
15049
15050 let roundtrip = read_csv_with_index_cols(
15051 &csv,
15052 &CsvReadOptions::default(),
15053 &["region", "product", "year"],
15054 )
15055 .expect("read");
15056
15057 assert!(roundtrip.equals(&frame));
15058 assert_eq!(roundtrip.row_multiindex(), frame.row_multiindex());
15059 }
15060
15061 #[test]
15062 fn test_write_csv_options_default_matches_write_csv_string() {
15063 let input = "a,b\n1,2\n3,4\n";
15064 let frame = read_csv_str(input).expect("read");
15065 let default_output = write_csv_string(&frame).expect("write");
15066 let options_output =
15067 write_csv_string_with_options(&frame, &CsvWriteOptions::default()).expect("write");
15068 assert_eq!(default_output, options_output);
15069 }
15070
15071 #[test]
15072 fn test_write_csv_options_na_rep_with_float_nan() {
15073 use fp_columnar::Column;
15075 let mut cols = std::collections::BTreeMap::new();
15076 cols.insert(
15077 "score".to_string(),
15078 Column::from_values(vec![Scalar::Float64(1.5), Scalar::Float64(f64::NAN)]).unwrap(),
15079 );
15080 let frame = DataFrame::new_with_column_order(
15081 Index::from_i64(vec![0, 1]),
15082 cols,
15083 vec!["score".to_string()],
15084 )
15085 .unwrap();
15086 let output = write_csv_string_with_options(
15087 &frame,
15088 &CsvWriteOptions {
15089 na_rep: "NaN".to_string(),
15090 ..CsvWriteOptions::default()
15091 },
15092 )
15093 .expect("write");
15094 assert!(output.contains("NaN"));
15095 }
15096
15097 #[test]
15098 fn test_csv_large_file_perf() {
15099 let col_count = 10;
15101 let row_count = 100_000;
15102 let headers: Vec<String> = (0..col_count).map(|i| format!("col{i}")).collect();
15103 let mut csv = String::with_capacity(row_count * 50);
15104 csv.push_str(&headers.join(","));
15105 csv.push('\n');
15106 for r in 0..row_count {
15107 for c in 0..col_count {
15108 if c > 0 {
15109 csv.push(',');
15110 }
15111 csv.push_str(&(r * col_count + c).to_string());
15112 }
15113 csv.push('\n');
15114 }
15115
15116 let frame = read_csv_str(&csv).expect("parse 100K rows");
15117 assert_eq!(frame.index().len(), row_count);
15118 assert_eq!(frame.columns().len(), col_count);
15119 assert_eq!(frame.column("col0").unwrap().values()[0], Scalar::Int64(0));
15121 assert_eq!(
15122 frame.column("col9").unwrap().values()[row_count - 1],
15123 Scalar::Int64(((row_count - 1) * col_count + 9) as i64)
15124 );
15125 eprintln!(
15126 "[TEST] test_csv_large_file_perf | rows={row_count} cols={col_count} parse_ok=true | PASS"
15127 );
15128 }
15129
15130 #[test]
15131 fn test_csv_golden_output() {
15132 let input = "a,b,c\n1,hello,3.14\n2,,true\n3,world,\n";
15134 let frame = read_csv_str(input).expect("parse");
15135 let output = write_csv_string(&frame).expect("write");
15136
15137 let expected = "a,b,c\n1,hello,3.14\n2,,1.0\n3,world,\n";
15141 assert_eq!(
15142 output, expected,
15143 "output does not match golden reference.\nGot:\n{output}\nExpected:\n{expected}"
15144 );
15145 eprintln!("[TEST] test_csv_golden_output | golden_match=true | PASS");
15146 }
15147
15148 use super::{CsvOnBadLines, CsvReadOptions, read_csv_with_options};
15151
15152 #[test]
15153 fn csv_with_custom_delimiter() {
15154 let input = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
15155 let opts = CsvReadOptions {
15156 delimiter: b'\t',
15157 ..Default::default()
15158 };
15159 let frame = read_csv_with_options(input, &opts).expect("parse tsv");
15160 assert_eq!(frame.index().len(), 2);
15161 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15162 }
15163
15164 #[test]
15165 fn csv_without_headers_generates_default_names_and_keeps_first_row() {
15166 let input = "1,2\n3,4\n";
15167 let opts = CsvReadOptions {
15168 has_headers: false,
15169 ..Default::default()
15170 };
15171 let frame = read_csv_with_options(input, &opts).expect("parse");
15172 assert_eq!(frame.index().len(), 2);
15173 assert_eq!(
15174 frame.column("column_0").unwrap().values()[0],
15175 Scalar::Int64(1)
15176 );
15177 assert_eq!(
15178 frame.column("column_1").unwrap().values()[0],
15179 Scalar::Int64(2)
15180 );
15181 assert_eq!(
15182 frame.column("column_0").unwrap().values()[1],
15183 Scalar::Int64(3)
15184 );
15185 assert_eq!(
15186 frame.column("column_1").unwrap().values()[1],
15187 Scalar::Int64(4)
15188 );
15189 }
15190
15191 #[test]
15192 fn csv_usecols_missing_column_errors() {
15193 let input = "a,b\n1,2\n";
15194 let opts = CsvReadOptions {
15195 usecols: Some(vec!["c".to_string()]),
15196 ..Default::default()
15197 };
15198 let err = read_csv_with_options(input, &opts).expect_err("missing usecols");
15199 assert!(
15200 matches!(err, IoError::MissingUsecols(missing) if missing == vec!["c".to_string()])
15201 );
15202 }
15203
15204 #[test]
15205 fn csv_without_headers_supports_generated_index_col_name() {
15206 let input = "10,alpha\n20,beta\n";
15207 let opts = CsvReadOptions {
15208 has_headers: false,
15209 index_col: Some("column_0".into()),
15210 ..Default::default()
15211 };
15212 let frame = read_csv_with_options(input, &opts).expect("parse");
15213 assert_eq!(frame.index().len(), 2);
15214 assert_eq!(frame.index().labels()[0], IndexLabel::Int64(10));
15215 assert_eq!(frame.index().labels()[1], IndexLabel::Int64(20));
15216 assert!(frame.column("column_0").is_none());
15217 assert_eq!(
15218 frame.column("column_1").unwrap().values()[0],
15219 Scalar::Utf8("alpha".into())
15220 );
15221 assert_eq!(
15222 frame.column("column_1").unwrap().values()[1],
15223 Scalar::Utf8("beta".into())
15224 );
15225 }
15226
15227 #[test]
15228 fn csv_with_na_values() {
15229 let input = "a,b\n1,NA\n2,n/a\n3,valid\n";
15230 let opts = CsvReadOptions {
15231 na_values: vec!["NA".into(), "n/a".into()],
15232 ..Default::default()
15233 };
15234 let frame = read_csv_with_options(input, &opts).expect("parse");
15235 let b = frame.column("b").unwrap();
15236 assert!(b.values()[0].is_missing());
15237 assert!(b.values()[1].is_missing());
15238 assert_eq!(b.values()[2], Scalar::Utf8("valid".into()));
15239 }
15240
15241 #[test]
15242 fn csv_none_is_default_na() {
15243 let input = "a,b\n1,None\n2,valid\n";
15245 let frame = read_csv_str(input).expect("parse");
15246 let b = frame.column("b").unwrap();
15247 assert!(b.values()[0].is_missing(), "None should be parsed as NA");
15248 assert_eq!(b.values()[1], Scalar::Utf8("valid".into()));
15249 }
15250
15251 #[test]
15252 fn csv_scalar_inference_matches_pandas_2_2_3() {
15253 let cell = |csv: &str| {
15255 let frame = read_csv_str(&format!("x\n{csv}\n")).expect("parse");
15256 frame.column("x").unwrap().values()[0].clone()
15257 };
15258 assert_eq!(cell("+1"), Scalar::Int64(1));
15260 assert_eq!(cell("01"), Scalar::Int64(1));
15261 assert_eq!(cell("-5"), Scalar::Int64(-5));
15262 assert_eq!(cell("1e3"), Scalar::Float64(1000.0));
15264 assert_eq!(cell("inf"), Scalar::Float64(f64::INFINITY));
15266 assert_eq!(cell("-inf"), Scalar::Float64(f64::NEG_INFINITY));
15267 assert_eq!(cell("TRUE"), Scalar::Bool(true));
15269 assert_eq!(cell("true"), Scalar::Bool(true));
15270 assert_eq!(cell("False"), Scalar::Bool(false));
15271 assert_eq!(cell(" 1 "), Scalar::Int64(1));
15273 assert_eq!(cell(" 3.5 "), Scalar::Float64(3.5));
15274 assert_eq!(cell("hello"), Scalar::Utf8("hello".into()));
15276 assert_eq!(cell(" abc "), Scalar::Utf8(" abc ".into()));
15280 assert_eq!(cell("true "), Scalar::Utf8("true ".into()));
15281 assert_eq!(cell(" True "), Scalar::Utf8(" True ".into()));
15282 assert_eq!(cell(" NA "), Scalar::Utf8(" NA ".into()));
15283 assert!(!matches!(cell(" NA "), Scalar::Null(_)));
15284 }
15285
15286 #[test]
15287 fn json_write_non_finite_floats_as_null_like_pandas() {
15288 let frame = read_csv_str("x\n1.5\ninf\n-inf\n").expect("parse");
15293 let json = write_json_string(&frame, JsonOrient::Records).expect("json");
15294 assert_eq!(json, r#"[{"x":1.5},{"x":null},{"x":null}]"#);
15295 }
15296
15297 #[test]
15298 fn csv_default_na_token_set_matches_pandas_table() {
15299 let default_tokens = [
15300 "", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
15301 "1.#QNAN", "<NA>", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null",
15302 ];
15303 for token in default_tokens {
15304 assert!(super::is_pandas_default_na(token), "{token:?}");
15305 }
15306
15307 for token in ["none", "NAN", "n/a ", " NULL", "0", "false"] {
15308 assert!(!super::is_pandas_default_na(token), "{token:?}");
15309 }
15310 }
15311
15312 #[test]
15313 fn csv_keep_default_na_false() {
15314 let input = "a,b\n1,NA\n2,CUSTOM\n3,valid\n";
15316 let opts = CsvReadOptions {
15317 na_values: vec!["CUSTOM".into()],
15318 keep_default_na: false,
15319 ..Default::default()
15320 };
15321 let frame = read_csv_with_options(input, &opts).expect("parse");
15322 let b = frame.column("b").unwrap();
15323 assert_eq!(b.values()[0], Scalar::Utf8("NA".into()));
15325 assert!(b.values()[1].is_missing());
15327 assert_eq!(b.values()[2], Scalar::Utf8("valid".into()));
15328 }
15329
15330 #[test]
15331 fn csv_na_filter_false() {
15332 let input = "a,b\n1,NA\n2,\n3,None\n";
15334 let opts = CsvReadOptions {
15335 na_filter: false,
15336 ..Default::default()
15337 };
15338 let frame = read_csv_with_options(input, &opts).expect("parse");
15339 let b = frame.column("b").unwrap();
15340 assert_eq!(b.values()[0], Scalar::Utf8("NA".into()));
15342 assert_eq!(b.values()[1], Scalar::Utf8("".into()));
15343 assert_eq!(b.values()[2], Scalar::Utf8("None".into()));
15344 }
15345
15346 #[test]
15347 fn csv_with_index_col() {
15348 let input = "id,val\na,10\nb,20\nc,30\n";
15349 let opts = CsvReadOptions {
15350 index_col: Some("id".into()),
15351 ..Default::default()
15352 };
15353 let frame = read_csv_with_options(input, &opts).expect("parse");
15354 assert_eq!(frame.index().len(), 3);
15355 assert_eq!(
15356 frame.index().labels()[0],
15357 fp_index::IndexLabel::Utf8("a".into())
15358 );
15359 assert!(frame.column("id").is_none());
15360 assert_eq!(frame.column("val").unwrap().values()[0], Scalar::Int64(10));
15361 }
15362
15363 #[test]
15364 fn csv_with_missing_index_col_errors() {
15365 let input = "id,val\na,10\nb,20\n";
15366 let opts = CsvReadOptions {
15367 index_col: Some("missing".into()),
15368 ..Default::default()
15369 };
15370
15371 let err = read_csv_with_options(input, &opts).expect_err("missing index_col should error");
15372 assert!(
15373 matches!(&err, IoError::MissingIndexColumn(name) if name == "missing"),
15374 "expected MissingIndexColumn(\"missing\"), got {err:?}"
15375 );
15376 }
15377
15378 #[test]
15379 fn csv_with_malformed_row_errors() {
15380 let input = "a,b\n1,2\n3\n";
15381 let opts = CsvReadOptions::default();
15382
15383 let err = read_csv_with_options(input, &opts).expect_err("malformed CSV row should error");
15384 assert!(
15385 matches!(&err, IoError::Csv(_)),
15386 "expected CSV parser error for ragged row, got {err:?}"
15387 );
15388 }
15389
15390 #[test]
15391 fn csv_on_bad_lines_skip_skips_extra_field_rows() {
15392 let input = "a,b\n1,2\n3,4,5\n6,7\n";
15393 let opts = CsvReadOptions {
15394 on_bad_lines: CsvOnBadLines::Skip,
15395 ..Default::default()
15396 };
15397
15398 let frame = read_csv_with_options(input, &opts).expect("parse with skipped bad line");
15399 assert_eq!(frame.index().len(), 2);
15400 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15401 assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Int64(2));
15402 assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(6));
15403 assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(7));
15404 }
15405
15406 #[test]
15407 fn csv_on_bad_lines_warn_skips_extra_field_rows() {
15408 let input = "a,b\n1,2\n3,4,5\n6,7\n";
15409 let opts = CsvReadOptions {
15410 on_bad_lines: CsvOnBadLines::Warn,
15411 ..Default::default()
15412 };
15413
15414 let frame = read_csv_with_options(input, &opts).expect("parse with warned bad line");
15415 assert_eq!(frame.index().len(), 2);
15416 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15417 assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(7));
15418 }
15419
15420 #[test]
15421 fn csv_on_bad_lines_skip_preserves_short_rows_as_missing() {
15422 let input = "a,b\n1,2\n3\n6,7\n";
15425 let opts = CsvReadOptions {
15426 on_bad_lines: CsvOnBadLines::Skip,
15427 ..Default::default()
15428 };
15429
15430 let frame = read_csv_with_options(input, &opts).expect("parse short row");
15431 assert_eq!(frame.index().len(), 3);
15432 assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
15433 assert_eq!(
15434 frame.column("b").unwrap().values()[1],
15435 Scalar::Null(NullKind::Null)
15436 );
15437 }
15438
15439 #[test]
15440 fn json_temporal_values_and_index_are_epoch_millis() {
15441 let dt_ns = 1_577_836_800_000_000_000_i64;
15446 let mut columns = BTreeMap::new();
15447 columns.insert(
15448 "d".to_owned(),
15449 Column::from_values(vec![Scalar::Datetime64(dt_ns)]).expect("d"),
15450 );
15451 columns.insert(
15452 "t".to_owned(),
15453 Column::from_values(vec![Scalar::Timedelta64(1_000_000_000)]).expect("t"),
15454 );
15455 let index = Index::new(vec![IndexLabel::Datetime64(dt_ns)]);
15456 let frame =
15457 DataFrame::new_with_column_order(index, columns, vec!["d".to_owned(), "t".to_owned()])
15458 .expect("frame");
15459
15460 let out = write_json_string(&frame, JsonOrient::Columns).expect("json");
15461 assert!(
15462 out.contains("1577836800000"),
15463 "datetime value/index should be epoch-millis int, got {out}"
15464 );
15465 assert!(
15466 out.contains("1000"),
15467 "timedelta value should be epoch-millis int (1000), got {out}"
15468 );
15469 assert!(
15471 !out.contains("2020-01-01"),
15472 "should not emit ISO string: {out}"
15473 );
15474 assert!(
15475 !out.contains("1577836800000000000"),
15476 "should not emit raw nanoseconds: {out}"
15477 );
15478 }
15479
15480 #[test]
15481 fn json_records_read_write_roundtrip() {
15482 let input = r#"[{"name":"Alice","age":30},{"name":"Bob","age":25}]"#;
15483 let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15484 assert_eq!(frame.index().len(), 2);
15485 assert_eq!(
15486 frame.column("name").unwrap().values()[0],
15487 Scalar::Utf8("Alice".into())
15488 );
15489 assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
15490
15491 let output = write_json_string(&frame, JsonOrient::Records).expect("write");
15492 let frame2 = read_json_str(&output, JsonOrient::Records).expect("re-read");
15493 assert_eq!(frame2.index().len(), 2);
15494 }
15495
15496 #[test]
15497 fn json_records_nullable_int_roundtrip_is_stable() {
15498 let input = r#"[{"city":"Boston","temp":72},{"city":"Paris","temp":null}]"#;
15499 let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15500 let output = write_json_string(&frame, JsonOrient::Records).expect("write records");
15501 let frame2 = read_json_str(&output, JsonOrient::Records).expect("re-read records");
15502
15503 assert!(frame.equals(&frame2));
15504 }
15505
15506 #[test]
15507 fn json_records_preserves_column_order() {
15508 let input = r#"[{"b":1,"a":2},{"c":3}]"#;
15509 let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15510 let order: Vec<&str> = frame
15511 .column_names()
15512 .iter()
15513 .map(|name| name.as_str())
15514 .collect();
15515 assert_eq!(order, vec!["b", "a", "c"]);
15516 }
15517
15518 #[test]
15519 fn json_columns_read_write_roundtrip() {
15520 let input = r#"{"name":{"row_a":"Alice","row_b":"Bob"},"age":{"row_a":30,"row_b":25}}"#;
15521 let frame = read_json_str(input, JsonOrient::Columns).expect("read json columns");
15522 assert_eq!(frame.index().len(), 2);
15523 assert_eq!(frame.index().labels()[0], IndexLabel::Utf8("row_a".into()));
15524
15525 let output = write_json_string(&frame, JsonOrient::Columns).expect("write");
15526 let frame2 = read_json_str(&output, JsonOrient::Columns).expect("re-read");
15527 assert_eq!(frame2.index().labels(), frame.index().labels());
15528 }
15529
15530 #[test]
15531 fn json_columns_write_duplicate_index_rejects() {
15532 let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("1".into())]);
15533 let mut columns = BTreeMap::new();
15534 columns.insert(
15535 "v".into(),
15536 Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col"),
15537 );
15538 let frame = DataFrame::new(index, columns).expect("frame");
15539
15540 let err = write_json_string(&frame, JsonOrient::Columns)
15541 .expect_err("duplicate JSON object keys should reject");
15542 assert!(
15543 matches!(&err, IoError::JsonFormat(msg) if msg.contains("duplicate index label key")),
15544 "expected duplicate-index-key JsonFormat, got {err:?}"
15545 );
15546 }
15547
15548 #[test]
15549 fn json_split_read_write_roundtrip() {
15550 let input = r#"{"columns":["x","y"],"index":["r1","r2","r3"],"data":[[1,4],[2,5],[3,6]]}"#;
15551 let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15552 assert_eq!(frame.index().len(), 3);
15553 assert_eq!(
15554 frame.index().labels()[0],
15555 fp_index::IndexLabel::Utf8("r1".into())
15556 );
15557 assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15558 assert_eq!(frame.column("y").unwrap().values()[2], Scalar::Int64(6));
15559
15560 let output = write_json_string(&frame, JsonOrient::Split).expect("write");
15561 let frame2 = read_json_str(&output, JsonOrient::Split).expect("re-read");
15562 assert_eq!(frame2.index().len(), 3);
15563 assert_eq!(frame2.index().labels(), frame.index().labels());
15564 }
15565
15566 #[test]
15567 fn json_records_multiindex_roundtrip_restores_logical_row_axis() {
15568 let frame = make_row_multiindex_test_dataframe();
15569 let json = write_json_string(&frame, JsonOrient::Records).expect("write");
15570 let roundtrip = read_json_str(&json, JsonOrient::Records).expect("read");
15571
15572 assert!(roundtrip.equals(&frame));
15573 assert!(roundtrip.row_multiindex().is_some());
15574 assert!(roundtrip.column("__index_level_0__").is_none());
15575 }
15576
15577 #[test]
15578 fn json_split_multiindex_roundtrip_restores_logical_row_axis() {
15579 let frame = make_row_multiindex_test_dataframe();
15580 let json = write_json_string(&frame, JsonOrient::Split).expect("write");
15581 let roundtrip = read_json_str(&json, JsonOrient::Split).expect("read");
15582
15583 assert!(roundtrip.equals(&frame));
15584 assert!(roundtrip.row_multiindex().is_some());
15585 assert!(roundtrip.column("__index_level_0__").is_none());
15586 }
15587
15588 #[test]
15589 fn json_split_without_index_defaults_to_range_index() {
15590 let input = r#"{"columns":["x"],"data":[[10],[20]]}"#;
15591 let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15592 assert_eq!(frame.index().labels()[0], fp_index::IndexLabel::Int64(0));
15593 assert_eq!(frame.index().labels()[1], fp_index::IndexLabel::Int64(1));
15594 }
15595
15596 #[test]
15597 fn json_split_index_length_mismatch_errors() {
15598 let input = r#"{"columns":["x"],"index":[0],"data":[[1],[2]]}"#;
15599 let err = read_json_str(input, JsonOrient::Split)
15600 .expect_err("split orient index/data length mismatch should error");
15601 assert!(
15602 matches!(&err, IoError::JsonFormat(msg) if msg.contains("index length")),
15603 "expected split index length error, got {err:?}"
15604 );
15605 }
15606
15607 #[test]
15608 fn json_split_row_length_mismatch_errors() {
15609 let input = r#"{"columns":["x","y"],"data":[[1],[2,3]]}"#;
15610 let err = read_json_str(input, JsonOrient::Split)
15611 .expect_err("split orient row length mismatch should error");
15612 assert!(
15613 matches!(&err, IoError::JsonFormat(msg) if msg.contains("row 0 length")),
15614 "expected split row length error, got {err:?}"
15615 );
15616 }
15617
15618 #[test]
15619 fn json_split_non_string_columns_are_stringified() {
15620 let input = r#"{"columns":[1,true,null,"name"],"data":[[10,20,30,40]]}"#;
15621 let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15622 assert_eq!(frame.column("1").unwrap().values()[0], Scalar::Int64(10));
15623 assert_eq!(frame.column("true").unwrap().values()[0], Scalar::Int64(20));
15624 assert_eq!(frame.column("null").unwrap().values()[0], Scalar::Int64(30));
15625 assert_eq!(frame.column("name").unwrap().values()[0], Scalar::Int64(40));
15626 }
15627
15628 #[test]
15629 fn json_split_duplicate_column_names_error() {
15630 let input = r#"{"columns":[1,"1"],"data":[[10,20]]}"#;
15631 let err = read_json_str(input, JsonOrient::Split).expect_err("dup columns");
15632 assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "1"));
15633 }
15634
15635 #[test]
15636 fn json_index_read_write_roundtrip() {
15637 let input = r#"{"row_a":{"name":"Alice","age":30},"row_b":{"name":"Bob","age":25}}"#;
15638 let frame = read_json_str(input, JsonOrient::Index).expect("read json index");
15639 assert_eq!(frame.index().len(), 2);
15640 assert_eq!(frame.index().labels()[0], IndexLabel::Utf8("row_a".into()));
15641 assert_eq!(
15642 frame.column("name").unwrap().values()[1],
15643 Scalar::Utf8("Bob".into())
15644 );
15645
15646 let output = write_json_string(&frame, JsonOrient::Index).expect("write");
15647 let frame2 = read_json_str(&output, JsonOrient::Index).expect("re-read");
15648 assert_eq!(frame2.index().labels(), frame.index().labels());
15649 assert_eq!(frame2.column("age").unwrap().values()[0], Scalar::Int64(30));
15650 }
15651
15652 #[test]
15653 fn json_index_preserves_column_order() {
15654 let input = r#"{"r1":{"b":1,"a":2},"r2":{"c":3}}"#;
15655 let frame = read_json_str(input, JsonOrient::Index).expect("parse");
15656 let order: Vec<&str> = frame
15657 .column_names()
15658 .iter()
15659 .map(|name| name.as_str())
15660 .collect();
15661 assert_eq!(order, vec!["b", "a", "c"]);
15662 }
15663
15664 #[test]
15665 fn json_index_missing_columns_null_fill() {
15666 let input = r#"{"r1":{"a":1},"r2":{"b":2}}"#;
15667 let frame = read_json_str(input, JsonOrient::Index).expect("parse");
15668 let a = frame.column("a").expect("a");
15669 let b = frame.column("b").expect("b");
15670
15671 assert_eq!(a.values()[0], Scalar::Float64(1.0));
15672 assert!(a.values()[1].is_missing());
15673 assert!(b.values()[0].is_missing());
15674 assert_eq!(b.values()[1], Scalar::Float64(2.0));
15675 }
15676
15677 #[test]
15678 fn json_index_write_duplicate_index_rejects() {
15679 let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("1".into())]);
15680 let mut columns = BTreeMap::new();
15681 columns.insert(
15682 "v".into(),
15683 Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col"),
15684 );
15685 let frame = DataFrame::new(index, columns).expect("frame");
15686
15687 let err = write_json_string(&frame, JsonOrient::Index)
15688 .expect_err("duplicate JSON object keys should reject");
15689 assert!(
15690 matches!(&err, IoError::JsonFormat(msg) if msg.contains("duplicate index label key")),
15691 "expected duplicate-index-key JsonFormat, got {err:?}"
15692 );
15693 }
15694
15695 #[test]
15696 fn json_index_read_non_object_row_rejects() {
15697 let input = r#"{"r1":{"a":1},"r2":[1,2]}"#;
15698 let err = read_json_str(input, JsonOrient::Index)
15699 .expect_err("index orient rows must be JSON objects");
15700 assert!(
15701 matches!(&err, IoError::JsonFormat(msg) if msg.contains("rows must be objects")),
15702 "expected row-object error, got {err:?}"
15703 );
15704 }
15705
15706 #[test]
15707 fn json_values_read_write_roundtrip() {
15708 let input = r#"[[1,"Alice"],[null,"Bob"]]"#;
15709 let frame = read_json_str(input, JsonOrient::Values).expect("read json values");
15710 assert_eq!(frame.index().len(), 2);
15711 assert_eq!(frame.column_names(), vec!["0", "1"]);
15712 assert_eq!(frame.column("0").unwrap().values()[0], Scalar::Float64(1.0));
15713 assert_eq!(
15714 frame.column("1").unwrap().values()[1],
15715 Scalar::Utf8("Bob".into())
15716 );
15717
15718 let output = write_json_string(&frame, JsonOrient::Values).expect("write");
15719 let frame2 = read_json_str(&output, JsonOrient::Values).expect("re-read");
15720 assert_eq!(frame2.index().len(), 2);
15721 assert_eq!(frame2.column_names(), frame.column_names());
15722 assert_eq!(
15723 frame2.column("0").unwrap().values(),
15724 frame.column("0").unwrap().values()
15725 );
15726 assert_eq!(
15727 frame2.column("1").unwrap().values(),
15728 frame.column("1").unwrap().values()
15729 );
15730 }
15731
15732 #[test]
15733 fn json_records_with_nulls() {
15734 let input = r#"[{"a":1,"b":null},{"a":null,"b":"hello"}]"#;
15735 let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15736 assert!(frame.column("a").unwrap().values()[1].is_missing());
15737 assert!(frame.column("b").unwrap().values()[0].is_missing());
15738 }
15739
15740 #[test]
15741 fn json_read_accepts_pandas_bare_nan_tokens() {
15742 let cases = [
15743 (JsonOrient::Records, r#"[{"a":NaN}]"#),
15744 (JsonOrient::Columns, r#"{"a":{"0":NaN}}"#),
15745 (
15746 JsonOrient::Split,
15747 r#"{"columns":["a"],"index":[0],"data":[[NaN]]}"#,
15748 ),
15749 (JsonOrient::Values, r#"[[NaN]]"#),
15750 ];
15751
15752 for (orient, input) in cases {
15753 let frame = read_json_str(input, orient).expect("parse bare NaN");
15754 let column_name = if orient == JsonOrient::Values {
15755 "0"
15756 } else {
15757 "a"
15758 };
15759 assert!(frame.column(column_name).unwrap().values()[0].is_missing());
15760 }
15761 }
15762
15763 #[test]
15764 fn json_records_write_preserves_nullable_int_column() {
15765 let frame = DataFrame::from_dict_with_index(
15767 vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
15768 vec!["row".into(), "row".into()],
15769 )
15770 .unwrap();
15771 let json = write_json_string(&frame, JsonOrient::Records).expect("write");
15772 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
15773 assert_eq!(parsed, serde_json::json!([{"a": 1}, {"a": null}]));
15774 }
15775
15776 #[test]
15777 fn json_non_records_nullable_int_reads_promote_to_float() {
15778 let cases = [
15779 (JsonOrient::Columns, r#"{"a":{"0":1,"1":null}}"#),
15780 (JsonOrient::Index, r#"{"0":{"a":1},"1":{"a":null}}"#),
15781 (
15782 JsonOrient::Split,
15783 r#"{"columns":["a"],"index":[0,1],"data":[[1],[null]]}"#,
15784 ),
15785 (JsonOrient::Values, r#"[[1],[null]]"#),
15786 ];
15787
15788 for (orient, input) in cases {
15789 let frame = read_json_str(input, orient).expect("read json");
15790 let column_name = if orient == JsonOrient::Values {
15791 "0"
15792 } else {
15793 "a"
15794 };
15795 let values = frame.column(column_name).expect("column").values();
15796 assert_eq!(values[0], Scalar::Float64(1.0));
15797 assert!(matches!(values[1], Scalar::Null(NullKind::NaN)));
15798 }
15799 }
15800
15801 #[test]
15802 fn json_non_records_nullable_int_writes_preserve_int() {
15803 let frame = DataFrame::from_dict(
15805 &["a"],
15806 vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
15807 )
15808 .unwrap();
15809
15810 let columns_json: serde_json::Value =
15811 serde_json::from_str(&write_json_string(&frame, JsonOrient::Columns).unwrap()).unwrap();
15812 assert_eq!(columns_json, serde_json::json!({"a": {"0": 1, "1": null}}));
15813
15814 let index_json: serde_json::Value =
15815 serde_json::from_str(&write_json_string(&frame, JsonOrient::Index).unwrap()).unwrap();
15816 assert_eq!(
15817 index_json,
15818 serde_json::json!({"0": {"a": 1}, "1": {"a": null}})
15819 );
15820
15821 let split_json: serde_json::Value =
15822 serde_json::from_str(&write_json_string(&frame, JsonOrient::Split).unwrap()).unwrap();
15823 assert_eq!(
15824 split_json,
15825 serde_json::json!({"columns": ["a"], "index": [0, 1], "data": [[1], [null]]})
15826 );
15827
15828 let values_json: serde_json::Value =
15829 serde_json::from_str(&write_json_string(&frame, JsonOrient::Values).unwrap()).unwrap();
15830 assert_eq!(values_json, serde_json::json!([[1], [null]]));
15831 }
15832
15833 #[test]
15834 fn json_records_empty_array() {
15835 let input = r#"[]"#;
15836 let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15837 assert_eq!(frame.index().len(), 0);
15838 }
15839
15840 #[test]
15841 fn json_records_mixed_numeric_coerces() {
15842 let input = r#"[{"v":1},{"v":2.5},{"v":true}]"#;
15843 let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15844 assert_eq!(frame.column("v").unwrap().values()[0], Scalar::Float64(1.0));
15846 assert_eq!(frame.column("v").unwrap().values()[1], Scalar::Float64(2.5));
15847 assert_eq!(frame.column("v").unwrap().values()[2], Scalar::Float64(1.0));
15848 }
15849
15850 #[test]
15851 fn json_records_mixed_utf8_numeric_preserves_object_values() {
15852 let input = r#"[{"v":1},{"v":"text"}]"#;
15853 let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15854 assert_eq!(
15855 frame.column("v").unwrap().values(),
15856 &[Scalar::Int64(1), Scalar::Utf8("text".into())]
15857 );
15858 }
15859
15860 #[test]
15861 fn file_csv_roundtrip() {
15862 let input = "a,b\n1,2\n3,4\n";
15863 let frame = read_csv_str(input).expect("parse");
15864
15865 let dir = std::env::temp_dir();
15866 let path = dir.join("fp_io_test_roundtrip.csv");
15867 super::write_csv(&frame, &path).expect("write file");
15868 let frame2 = super::read_csv(&path).expect("read file");
15869 assert_eq!(frame2.index().len(), 2);
15870 std::fs::remove_file(&path).ok();
15871 }
15872
15873 #[test]
15874 fn file_csv_with_options_path() {
15875 let input = "id\tval\na\tNA\nb\t2\n";
15877 let dir = std::env::temp_dir();
15878 let path = dir.join("fp_io_test_options.csv");
15879 std::fs::write(&path, input).expect("write fixture");
15880
15881 let options = CsvReadOptions {
15882 delimiter: b'\t',
15883 na_values: vec!["NA".into()],
15884 index_col: Some("id".into()),
15885 ..Default::default()
15886 };
15887
15888 let frame = super::read_csv_with_options_path(&path, &options).expect("read with options");
15889 assert_eq!(
15890 frame.index().labels()[0],
15891 fp_index::IndexLabel::Utf8("a".into())
15892 );
15893 assert!(frame.column("id").is_none());
15894 assert!(frame.column("val").unwrap().values()[0].is_missing());
15895 assert_eq!(frame.column("val").unwrap().values()[1], Scalar::Int64(2));
15896
15897 std::fs::remove_file(&path).ok();
15898 }
15899
15900 #[test]
15901 fn file_json_roundtrip() {
15902 let input = r#"[{"x":1},{"x":2}]"#;
15903 let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15904
15905 let dir = std::env::temp_dir();
15906 let path = dir.join("fp_io_test_roundtrip.json");
15907 super::write_json(&frame, &path, JsonOrient::Records).expect("write file");
15908 let frame2 = super::read_json(&path, JsonOrient::Records).expect("read file");
15909 assert_eq!(frame2.index().len(), 2);
15910 std::fs::remove_file(&path).ok();
15911 }
15912
15913 #[test]
15916 fn read_table_str_parses_tab_separated_4pwr9() {
15917 let input = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
15918 let frame = super::read_table_str(input).expect("parse tsv");
15919 assert_eq!(frame.index().len(), 2);
15920 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15921 assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Int64(6));
15922 }
15923
15924 #[test]
15925 fn read_table_with_options_overrides_default_delimiter_4pwr9() {
15926 let input = "x\ty\n1\tNA\n2\t3\n";
15928 let opts = CsvReadOptions {
15929 na_values: vec!["NA".into()],
15930 ..Default::default()
15931 };
15932 let frame = super::read_table_with_options(input, &opts).expect("parse tsv with na");
15933 assert!(frame.column("y").unwrap().values()[0].is_missing());
15934 assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(3));
15935 }
15936
15937 #[test]
15938 fn read_table_with_options_honours_explicit_pipe_delimiter_4pwr9() {
15939 let input = "x|y\n1|2\n3|4\n";
15940 let opts = CsvReadOptions {
15941 delimiter: b'|',
15942 ..Default::default()
15943 };
15944 let frame = super::read_table_with_options(input, &opts).expect("parse pipe");
15945 assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15946 assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(4));
15947 }
15948
15949 #[test]
15952 fn read_fwf_str_with_colspecs_parses_aligned_records_23n8u() {
15953 let input = "name age active\nalice 30 true\nbob 25 false\n";
15954 let opts = super::FwfReadOptions {
15955 colspecs: Some(vec![(0, 8), (8, 14), (14, 20)]),
15956 true_values: vec!["true".into()],
15957 false_values: vec!["false".into()],
15958 ..Default::default()
15959 };
15960 let frame = super::read_fwf_str(input, &opts).expect("parse fwf");
15961 assert_eq!(frame.index().len(), 2);
15962 assert_eq!(
15963 frame.column("name").unwrap().values()[0],
15964 Scalar::Utf8("alice".into())
15965 );
15966 assert_eq!(frame.column("age").unwrap().values()[0], Scalar::Int64(30));
15967 assert_eq!(
15968 frame.column("active").unwrap().values()[0],
15969 Scalar::Bool(true)
15970 );
15971 }
15972
15973 #[test]
15974 fn read_fwf_str_with_widths_derives_colspecs_23n8u() {
15975 let input = "x y \n1 2 \n3 4 \n";
15976 let opts = super::FwfReadOptions {
15977 widths: Some(vec![3, 3]),
15978 ..Default::default()
15979 };
15980 let frame = super::read_fwf_str(input, &opts).expect("parse fwf widths");
15981 assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15982 assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(4));
15983 }
15984
15985 #[test]
15986 fn read_fwf_str_threads_na_handling_23n8u() {
15987 let input = "id val\nA NA \nB 7 \n";
15989 let opts = super::FwfReadOptions {
15990 colspecs: Some(vec![(0, 5), (5, 9)]),
15991 na_values: vec!["NA".into()],
15992 ..Default::default()
15993 };
15994 let frame = super::read_fwf_str(input, &opts).expect("parse fwf na");
15995 let col = frame.column("val").unwrap().values();
15996 assert!(col[0].is_missing());
15997 assert_eq!(col[1], Scalar::Int64(7));
15998 }
15999
16000 #[test]
16001 fn read_fwf_rejects_both_colspecs_and_widths_23n8u() {
16002 let opts = super::FwfReadOptions {
16003 colspecs: Some(vec![(0, 3)]),
16004 widths: Some(vec![3]),
16005 ..Default::default()
16006 };
16007 let err = super::read_fwf_str("x\n1\n", &opts).expect_err("must reject");
16008 assert!(
16009 matches!(&err, super::IoError::Fwf(message) if message.contains("only one of")),
16010 "unexpected error: {err:?}"
16011 );
16012 }
16013
16014 #[test]
16015 fn read_fwf_infers_colspecs_when_specs_are_omitted_htdmp() {
16016 let opts = super::FwfReadOptions::default();
16017 let frame = super::read_fwf_str("a b\n1 2\n3 4\n", &opts).expect("infer fwf specs");
16018 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
16019 assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(4));
16020 }
16021
16022 #[test]
16023 fn read_fwf_infers_aligned_wide_colspecs_htdmp() {
16024 let input = "name age active\nalice 30 true\nbob 25 false\n";
16025 let opts = super::FwfReadOptions {
16026 true_values: vec!["true".into()],
16027 false_values: vec!["false".into()],
16028 ..Default::default()
16029 };
16030 let frame = super::read_fwf_str(input, &opts).expect("infer aligned fwf specs");
16031 assert_eq!(
16032 frame.column("name").unwrap().values()[0],
16033 Scalar::Utf8("alice".into())
16034 );
16035 assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
16036 assert_eq!(
16037 frame.column("active").unwrap().values()[1],
16038 Scalar::Bool(false)
16039 );
16040 }
16041
16042 #[test]
16043 fn read_fwf_infer_honors_skiprows_and_skipfooter_htdmp() {
16044 let input = "ignored wide banner\nx y\n1 2\nfooter text ignored\n";
16045 let opts = super::FwfReadOptions {
16046 skiprows: 1,
16047 skipfooter: 1,
16048 ..Default::default()
16049 };
16050 let frame = super::read_fwf_str(input, &opts).expect("infer after skipping");
16051 assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
16052 assert_eq!(frame.column("y").unwrap().values()[0], Scalar::Int64(2));
16053 }
16054
16055 #[test]
16058 fn read_clipboard_rejects_with_deferred_marker_2yy4d() {
16059 let err = super::read_clipboard().expect_err("must reject");
16060 assert!(
16061 matches!(&err, super::IoError::Deferred(message)
16062 if message.contains("read_clipboard") && message.contains("headless")),
16063 "unexpected error: {err:?}"
16064 );
16065 }
16066
16067 #[test]
16068 fn read_gbq_rejects_with_deferred_marker_2yy4d() {
16069 let err = super::read_gbq("SELECT 1", Some("proj")).expect_err("must reject");
16070 assert!(
16071 matches!(&err, super::IoError::Deferred(message)
16072 if message.contains("read_gbq") && message.contains("BigQuery")),
16073 "unexpected error: {err:?}"
16074 );
16075 let no_project_err = super::read_gbq("SELECT 1", None).expect_err("must reject");
16076 assert!(matches!(no_project_err, super::IoError::Deferred(_)));
16077 }
16078
16079 #[test]
16080 fn dataframe_deferred_writer_surfaces_report_method_names_e6jrk() {
16081 use super::DataFrameIoExt;
16082
16083 let frame = make_test_dataframe();
16084 let clipboard_err = frame
16085 .to_clipboard()
16086 .expect_err("must reject clipboard writer");
16087 assert!(
16088 matches!(&clipboard_err, super::IoError::Deferred(message) if message.contains("to_clipboard") && message.contains("headless"))
16089 );
16090
16091 let gbq_err = frame
16092 .to_gbq("dataset.table", Some("project"))
16093 .expect_err("must reject BigQuery writer");
16094 assert!(
16095 matches!(&gbq_err, super::IoError::Deferred(message) if message.contains("to_gbq") && message.contains("BigQuery"))
16096 );
16097
16098 let no_project_err = frame
16099 .to_gbq("dataset.table", None)
16100 .expect_err("must reject BigQuery writer without project");
16101 assert!(matches!(no_project_err, super::IoError::Deferred(_)));
16102 }
16103
16104 #[test]
16105 fn series_clipboard_writer_rejects_with_deferred_marker() {
16106 use super::SeriesIoExt;
16107
16108 let source = Series::from_values(
16109 "sales",
16110 vec!["r1".into(), "r2".into()],
16111 vec![Scalar::Int64(10), Scalar::Int64(12)],
16112 )
16113 .expect("source series");
16114 let err = source
16115 .to_clipboard()
16116 .expect_err("must reject series clipboard writer");
16117 assert!(
16118 matches!(&err, super::IoError::Deferred(message) if message.contains("to_clipboard") && message.contains("headless"))
16119 );
16120 }
16121
16122 #[test]
16123 fn read_sas_rejects_with_deferred_marker_2yy4d() {
16124 let path = std::path::Path::new("/nonexistent.sas7bdat");
16125 let err = super::read_sas(path).expect_err("must reject");
16126 assert!(
16127 matches!(&err, super::IoError::Deferred(message)
16128 if message.contains("read_sas") && message.contains("sas7bdat")),
16129 "unexpected error: {err:?}"
16130 );
16131 }
16132
16133 #[test]
16134 fn read_spss_rejects_with_deferred_marker_2yy4d() {
16135 let path = std::path::Path::new("/nonexistent.sav");
16136 let err = super::read_spss(path).expect_err("must reject");
16137 assert!(
16138 matches!(&err, super::IoError::Deferred(message)
16139 if message.contains("read_spss") && message.contains(".sav")),
16140 "unexpected error: {err:?}"
16141 );
16142 }
16143
16144 #[test]
16145 fn read_fwf_path_reads_fixed_width_file_23n8u() {
16146 let input = "a b\n1 2\n3 4\n";
16147 let dir = std::env::temp_dir();
16148 let path = dir.join("fp_io_test_read_fwf_23n8u.txt");
16149 std::fs::write(&path, input).expect("write fixture");
16150
16151 let opts = super::FwfReadOptions {
16152 colspecs: Some(vec![(0, 4), (4, 5)]),
16153 ..Default::default()
16154 };
16155 let frame = super::read_fwf(&path, &opts).expect("read fwf path");
16156 assert_eq!(frame.index().len(), 2);
16157 assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
16158 assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Int64(2));
16159
16160 std::fs::remove_file(&path).ok();
16161 }
16162
16163 #[test]
16164 fn read_table_path_roundtrips_through_read_csv_path_4pwr9() {
16165 let input = "id\tval\na\t1\nb\t2\n";
16166 let dir = std::env::temp_dir();
16167 let path = dir.join("fp_io_test_read_table_4pwr9.tsv");
16168 std::fs::write(&path, input).expect("write fixture");
16169
16170 let frame = super::read_table(&path).expect("read tsv");
16171 assert_eq!(frame.index().len(), 2);
16172 assert_eq!(
16173 frame.column("id").unwrap().values()[0],
16174 Scalar::Utf8("a".into())
16175 );
16176 assert_eq!(frame.column("val").unwrap().values()[1], Scalar::Int64(2));
16177
16178 let opts = CsvReadOptions {
16179 index_col: Some("id".into()),
16180 ..Default::default()
16181 };
16182 let frame2 =
16183 super::read_table_with_options_path(&path, &opts).expect("read tsv with options");
16184 assert!(frame2.column("id").is_none());
16185 assert_eq!(
16186 frame2.index().labels()[0],
16187 fp_index::IndexLabel::Utf8("a".into())
16188 );
16189 assert_eq!(frame2.column("val").unwrap().values()[1], Scalar::Int64(2));
16190
16191 std::fs::remove_file(&path).ok();
16192 }
16193
16194 fn make_test_dataframe() -> DataFrame {
16197 use fp_types::DType;
16198
16199 let mut columns = BTreeMap::new();
16200 columns.insert(
16201 "ints".to_string(),
16202 Column::new(
16203 DType::Int64,
16204 vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
16205 )
16206 .unwrap(),
16207 );
16208 columns.insert(
16209 "floats".to_string(),
16210 Column::new(
16211 DType::Float64,
16212 vec![
16213 Scalar::Float64(1.5),
16214 Scalar::Float64(2.5),
16215 Scalar::Float64(3.5),
16216 ],
16217 )
16218 .unwrap(),
16219 );
16220 columns.insert(
16221 "names".to_string(),
16222 Column::from_values(vec![
16223 Scalar::Utf8("alice".into()),
16224 Scalar::Utf8("bob".into()),
16225 Scalar::Utf8("carol".into()),
16226 ])
16227 .unwrap(),
16228 );
16229
16230 let labels = vec![
16231 IndexLabel::Int64(0),
16232 IndexLabel::Int64(1),
16233 IndexLabel::Int64(2),
16234 ];
16235 DataFrame::new_with_column_order(
16236 Index::new(labels),
16237 columns,
16238 vec![
16239 "ints".to_string(),
16240 "floats".to_string(),
16241 "names".to_string(),
16242 ],
16243 )
16244 .unwrap()
16245 }
16246
16247 #[test]
16248 fn dataframe_io_ext_pandas_named_aliases_cover_supported_writers() {
16249 use super::DataFrameIoExt;
16250
16251 let frame = make_test_dataframe();
16252 let csv = frame.to_csv_string().expect("csv string");
16253 assert_eq!(csv, super::write_csv_string(&frame).expect("free csv"));
16254 assert_eq!(
16255 frame.to_markdown_string().expect("markdown string"),
16256 write_markdown_string(&frame).expect("free markdown")
16257 );
16258 assert_eq!(
16259 frame.to_latex_string().expect("latex string"),
16260 write_latex_string(&frame).expect("free latex")
16261 );
16262 let dir = std::env::temp_dir();
16263 let stem = format!("fp_io_dataframe_io_ext_{}", std::process::id());
16264 let excel_path = dir.join(format!("{stem}.xlsx"));
16265 let feather_path = dir.join(format!("{stem}.feather"));
16266 let parquet_path = dir.join(format!("{stem}.parquet"));
16267
16268 frame.to_excel(&excel_path).expect("to_excel alias");
16269 frame.to_feather(&feather_path).expect("to_feather alias");
16270 frame.to_parquet(&parquet_path).expect("to_parquet alias");
16271
16272 assert!(
16273 std::fs::metadata(&excel_path)
16274 .expect("excel metadata")
16275 .len()
16276 > 0
16277 );
16278 assert_eq!(
16279 super::read_feather(&feather_path)
16280 .expect("read feather")
16281 .index()
16282 .len(),
16283 frame.index().len()
16284 );
16285 assert_eq!(
16286 super::read_parquet(&parquet_path)
16287 .expect("read parquet")
16288 .index()
16289 .len(),
16290 frame.index().len()
16291 );
16292
16293 std::fs::remove_file(&excel_path).ok();
16294 std::fs::remove_file(&feather_path).ok();
16295 std::fs::remove_file(&parquet_path).ok();
16296 }
16297
16298 #[test]
16299 fn dataframe_io_ext_rjs51_in_memory_methods_match_free_functions() {
16300 use super::DataFrameIoExt;
16301
16302 let frame = make_test_dataframe();
16303 let csv_options = CsvWriteOptions {
16304 delimiter: b';',
16305 na_rep: "<NA>".to_owned(),
16306 header: true,
16307 include_index: true,
16308 index_label: Some("row".to_owned()),
16309 };
16310 assert_eq!(
16311 frame
16312 .to_csv_string_with_options(&csv_options)
16313 .expect("csv options through extension"),
16314 write_csv_string_with_options(&frame, &csv_options).expect("csv options free fn")
16315 );
16316 assert_eq!(
16317 frame
16318 .to_json_string(JsonOrient::Split)
16319 .expect("json split through extension"),
16320 write_json_string(&frame, JsonOrient::Split).expect("json split free fn")
16321 );
16322 assert_eq!(
16323 frame.to_jsonl_string().expect("jsonl through extension"),
16324 write_jsonl_string(&frame).expect("jsonl free fn")
16325 );
16326 let html_options = HtmlWriteOptions {
16327 include_index: false,
16328 ..HtmlWriteOptions::default()
16329 };
16330 assert_eq!(
16331 frame
16332 .to_html_string_with_options(&html_options)
16333 .expect("html options through extension"),
16334 write_html_string_with_options(&frame, &html_options).expect("html options free fn")
16335 );
16336 let xml_options = XmlWriteOptions {
16337 include_index: false,
16338 root_name: "records".to_owned(),
16339 row_name: "record".to_owned(),
16340 index_label: None,
16341 };
16342 assert_eq!(
16343 frame
16344 .to_xml_string_with_options(&xml_options)
16345 .expect("xml options through extension"),
16346 write_xml_string_with_options(&frame, &xml_options).expect("xml options free fn")
16347 );
16348
16349 let parquet = frame
16350 .to_parquet_bytes()
16351 .expect("parquet bytes through extension");
16352 assert_eq!(
16353 read_parquet_bytes(&parquet)
16354 .expect("parquet roundtrip")
16355 .index()
16356 .len(),
16357 frame.index().len()
16358 );
16359 let orc = frame.to_orc_bytes().expect("orc bytes through extension");
16360 assert_eq!(
16361 read_orc_bytes(&orc).expect("orc roundtrip").index().len(),
16362 frame.index().len()
16363 );
16364 let feather = frame
16365 .to_feather_bytes()
16366 .expect("feather bytes through extension");
16367 assert_eq!(
16368 read_feather_bytes(&feather)
16369 .expect("feather roundtrip")
16370 .index()
16371 .len(),
16372 frame.index().len()
16373 );
16374 let excel = frame
16375 .to_excel_bytes()
16376 .expect("excel bytes through extension");
16377 assert_eq!(
16378 read_excel_bytes(&excel, &ExcelReadOptions::default())
16379 .expect("excel roundtrip")
16380 .index()
16381 .len(),
16382 frame.index().len()
16383 );
16384 }
16385
16386 fn make_row_multiindex_test_dataframe() -> DataFrame {
16387 let df = DataFrame::from_dict(
16388 &["region", "product", "year", "sales", "cost"],
16389 vec![
16390 (
16391 "region",
16392 vec![
16393 Scalar::Utf8("north".into()),
16394 Scalar::Utf8("north".into()),
16395 Scalar::Utf8("south".into()),
16396 ],
16397 ),
16398 (
16399 "product",
16400 vec![
16401 Scalar::Utf8("apple".into()),
16402 Scalar::Utf8("pear".into()),
16403 Scalar::Utf8("apple".into()),
16404 ],
16405 ),
16406 (
16407 "year",
16408 vec![
16409 Scalar::Int64(2023),
16410 Scalar::Int64(2024),
16411 Scalar::Int64(2023),
16412 ],
16413 ),
16414 (
16415 "sales",
16416 vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
16417 ),
16418 (
16419 "cost",
16420 vec![Scalar::Int64(4), Scalar::Int64(7), Scalar::Int64(12)],
16421 ),
16422 ],
16423 )
16424 .unwrap();
16425 df.set_index_multi(&["region", "product", "year"], true, "|")
16426 .unwrap()
16427 }
16428
16429 #[test]
16430 fn parquet_bytes_roundtrip() {
16431 let frame = make_test_dataframe();
16432 let bytes = super::write_parquet_bytes(&frame).expect("write parquet");
16433 assert!(!bytes.is_empty());
16434
16435 let frame2 = super::read_parquet_bytes(&bytes).expect("read parquet");
16436 assert_eq!(frame2.index().len(), 3);
16437 assert_eq!(
16438 frame2
16439 .column_names()
16440 .iter()
16441 .map(|s| s.as_str())
16442 .collect::<Vec<_>>(),
16443 vec!["ints", "floats", "names"]
16444 );
16445
16446 let ints = frame2.column("ints").unwrap();
16448 assert_eq!(ints.values()[0], Scalar::Int64(10));
16449 assert_eq!(ints.values()[1], Scalar::Int64(20));
16450 assert_eq!(ints.values()[2], Scalar::Int64(30));
16451
16452 let floats = frame2.column("floats").unwrap();
16453 assert_eq!(floats.values()[0], Scalar::Float64(1.5));
16454 assert_eq!(floats.values()[1], Scalar::Float64(2.5));
16455 assert_eq!(floats.values()[2], Scalar::Float64(3.5));
16456
16457 let names = frame2.column("names").unwrap();
16458 assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
16459 assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
16460 assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
16461 }
16462
16463 #[test]
16464 fn parquet_row_multiindex_roundtrip_restores_logical_row_axis() {
16465 let frame = make_row_multiindex_test_dataframe();
16466 let bytes = super::write_parquet_bytes(&frame).expect("write parquet");
16467 let roundtrip = super::read_parquet_bytes(&bytes).expect("read parquet");
16468
16469 assert!(roundtrip.equals(&frame));
16470 assert!(roundtrip.column("__index_level_0__").is_none());
16471 assert_eq!(
16472 roundtrip
16473 .row_multiindex()
16474 .expect("row multiindex should be restored")
16475 .get_level_values(0)
16476 .unwrap()
16477 .labels(),
16478 frame
16479 .row_multiindex()
16480 .expect("source row multiindex")
16481 .get_level_values(0)
16482 .unwrap()
16483 .labels()
16484 );
16485 }
16486
16487 #[test]
16488 fn parquet_file_roundtrip() {
16489 let frame = make_test_dataframe();
16490 let dir = std::env::temp_dir();
16491 let path = dir.join("fp_io_test_parquet_roundtrip.parquet");
16492
16493 super::write_parquet(&frame, &path).expect("write parquet file");
16494 let frame2 = super::read_parquet(&path).expect("read parquet file");
16495 assert_eq!(frame2.index().len(), 3);
16496 assert_eq!(
16497 frame2.column("ints").unwrap().values()[0],
16498 Scalar::Int64(10)
16499 );
16500 std::fs::remove_file(&path).ok();
16501 }
16502
16503 #[test]
16504 fn parquet_with_nulls() {
16505 use fp_types::DType;
16506
16507 let mut columns = BTreeMap::new();
16508 columns.insert(
16509 "vals".to_string(),
16510 Column::new(
16511 DType::Float64,
16512 vec![
16513 Scalar::Float64(1.0),
16514 Scalar::Null(NullKind::NaN),
16515 Scalar::Float64(3.0),
16516 ],
16517 )
16518 .unwrap(),
16519 );
16520 columns.insert(
16521 "strs".to_string(),
16522 Column::from_values(vec![
16523 Scalar::Utf8("a".into()),
16524 Scalar::Null(NullKind::Null),
16525 Scalar::Utf8("c".into()),
16526 ])
16527 .unwrap(),
16528 );
16529
16530 let labels = vec![
16531 IndexLabel::Int64(0),
16532 IndexLabel::Int64(1),
16533 IndexLabel::Int64(2),
16534 ];
16535 let frame = DataFrame::new_with_column_order(
16536 Index::new(labels),
16537 columns,
16538 vec!["vals".to_string(), "strs".to_string()],
16539 )
16540 .unwrap();
16541
16542 let bytes = super::write_parquet_bytes(&frame).expect("write");
16543 let frame2 = super::read_parquet_bytes(&bytes).expect("read");
16544
16545 assert_eq!(
16546 frame2.column("vals").unwrap().values()[0],
16547 Scalar::Float64(1.0)
16548 );
16549 assert!(frame2.column("vals").unwrap().values()[1].is_missing());
16550 assert_eq!(
16551 frame2.column("vals").unwrap().values()[2],
16552 Scalar::Float64(3.0)
16553 );
16554
16555 assert_eq!(
16556 frame2.column("strs").unwrap().values()[0],
16557 Scalar::Utf8("a".into())
16558 );
16559 assert!(frame2.column("strs").unwrap().values()[1].is_missing());
16560 assert_eq!(
16561 frame2.column("strs").unwrap().values()[2],
16562 Scalar::Utf8("c".into())
16563 );
16564 }
16565
16566 #[test]
16567 fn parquet_bool_column() {
16568 use fp_types::DType;
16569
16570 let mut columns = BTreeMap::new();
16571 columns.insert(
16572 "flags".to_string(),
16573 Column::new(
16574 DType::Bool,
16575 vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
16576 )
16577 .unwrap(),
16578 );
16579
16580 let labels = vec![
16581 IndexLabel::Int64(0),
16582 IndexLabel::Int64(1),
16583 IndexLabel::Int64(2),
16584 ];
16585 let frame = DataFrame::new_with_column_order(
16586 Index::new(labels),
16587 columns,
16588 vec!["flags".to_string()],
16589 )
16590 .unwrap();
16591
16592 let bytes = super::write_parquet_bytes(&frame).expect("write");
16593 let frame2 = super::read_parquet_bytes(&bytes).expect("read");
16594
16595 assert_eq!(
16596 frame2.column("flags").unwrap().values()[0],
16597 Scalar::Bool(true)
16598 );
16599 assert_eq!(
16600 frame2.column("flags").unwrap().values()[1],
16601 Scalar::Bool(false)
16602 );
16603 assert_eq!(
16604 frame2.column("flags").unwrap().values()[2],
16605 Scalar::Bool(true)
16606 );
16607 }
16608
16609 #[test]
16610 fn parquet_empty_dataframe_errors() {
16611 let frame =
16615 DataFrame::new_with_column_order(Index::new(vec![]), BTreeMap::new(), vec![]).unwrap();
16616
16617 let result = super::write_parquet_bytes(&frame);
16618 assert!(result.is_err());
16619 }
16620
16621 #[test]
16622 fn orc_bytes_roundtrip_preserves_supported_columns() {
16623 let frame = make_test_dataframe();
16624 let bytes = write_orc_bytes(&frame).expect("write orc");
16625 assert!(bytes.starts_with(b"ORC"));
16626
16627 let frame2 = read_orc_bytes(&bytes).expect("read orc");
16628 assert_eq!(frame2.index().len(), 3);
16629 assert_eq!(
16630 frame2
16631 .column_names()
16632 .iter()
16633 .map(|s| s.as_str())
16634 .collect::<Vec<_>>(),
16635 vec!["ints", "floats", "names"]
16636 );
16637
16638 assert_eq!(
16639 frame2.column("ints").unwrap().values()[0],
16640 Scalar::Int64(10)
16641 );
16642 assert_eq!(
16643 frame2.column("floats").unwrap().values()[1],
16644 Scalar::Float64(2.5)
16645 );
16646 assert_eq!(
16647 frame2.column("names").unwrap().values()[2],
16648 Scalar::Utf8("carol".into())
16649 );
16650 }
16651
16652 #[test]
16653 fn orc_file_and_extension_aliases_roundtrip() {
16654 use super::DataFrameIoExt;
16655
16656 let frame = make_test_dataframe();
16657 let free_path = std::env::temp_dir().join(format!(
16658 "fp_io_orc_free_{}_{}.orc",
16659 std::process::id(),
16660 line!()
16661 ));
16662 let trait_path = std::env::temp_dir().join(format!(
16663 "fp_io_orc_trait_{}_{}.orc",
16664 std::process::id(),
16665 line!()
16666 ));
16667
16668 write_orc(&frame, &free_path).expect("write orc path");
16669 let free_roundtrip = read_orc(&free_path).expect("read orc path");
16670 assert!(free_roundtrip.equals(&frame));
16671
16672 frame.to_orc_file(&trait_path).expect("trait orc path");
16673 let trait_roundtrip = read_orc(&trait_path).expect("read trait orc path");
16674 assert!(trait_roundtrip.equals(&frame));
16675
16676 let bytes = frame.to_orc_bytes().expect("trait orc bytes");
16677 assert!(
16678 read_orc_bytes(&bytes)
16679 .expect("read trait orc bytes")
16680 .equals(&frame)
16681 );
16682 }
16683
16684 #[test]
16685 fn orc_row_multiindex_roundtrip_restores_logical_row_axis() {
16686 let frame = make_row_multiindex_test_dataframe();
16687 let bytes = write_orc_bytes(&frame).expect("write orc");
16688 let roundtrip = read_orc_bytes(&bytes).expect("read orc");
16689
16690 assert!(roundtrip.equals(&frame));
16691 assert!(roundtrip.column("__index_level_0__").is_none());
16692 assert_eq!(
16693 roundtrip
16694 .row_multiindex()
16695 .expect("row multiindex should be restored")
16696 .get_level_values(0)
16697 .unwrap()
16698 .labels(),
16699 frame
16700 .row_multiindex()
16701 .expect("source row multiindex")
16702 .get_level_values(0)
16703 .unwrap()
16704 .labels()
16705 );
16706 }
16707
16708 #[test]
16709 fn orc_reader_rejects_malformed_input() {
16710 let err = read_orc_bytes(b"not an orc file").expect_err("malformed orc should fail");
16711 assert!(matches!(err, IoError::Orc(_)));
16712 }
16713
16714 #[test]
16717 fn write_excel_with_options_custom_sheet_name_survives_round_trip() {
16718 let frame = make_test_dataframe();
16719 let bytes = super::write_excel_bytes_with_options(
16720 &frame,
16721 &super::ExcelWriteOptions {
16722 sheet_name: "Results".to_string(),
16723 ..super::ExcelWriteOptions::default()
16724 },
16725 )
16726 .expect("write");
16727 let sheets =
16728 super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16729 .expect("read");
16730 assert_eq!(sheets.len(), 1);
16731 assert!(sheets.contains_key("Results"));
16732 }
16733
16734 #[test]
16735 fn write_excel_with_options_index_false_omits_index_column() {
16736 let frame = make_test_dataframe();
16737 let bytes = super::write_excel_bytes_with_options(
16738 &frame,
16739 &super::ExcelWriteOptions {
16740 index: false,
16741 ..super::ExcelWriteOptions::default()
16742 },
16743 )
16744 .expect("write");
16745 let frame2 =
16746 super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16747 let names = frame2.column_names();
16750 assert_eq!(
16751 names.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
16752 vec!["ints", "floats", "names"]
16753 );
16754 }
16755
16756 #[test]
16757 fn write_excel_with_options_index_label_overrides_header() {
16758 let frame = make_test_dataframe();
16759 let bytes = super::write_excel_bytes_with_options(
16760 &frame,
16761 &super::ExcelWriteOptions {
16762 index_label: Some("row_id".to_string()),
16763 ..super::ExcelWriteOptions::default()
16764 },
16765 )
16766 .expect("write");
16767 let frame2 =
16768 super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16769 let names = frame2.column_names();
16771 assert_eq!(names[0], "row_id");
16772 }
16773
16774 #[test]
16775 fn write_excel_with_options_header_false_omits_header_row() {
16776 let frame = make_test_dataframe();
16777 let bytes = super::write_excel_bytes_with_options(
16778 &frame,
16779 &super::ExcelWriteOptions {
16780 header: false,
16781 index: false,
16782 ..super::ExcelWriteOptions::default()
16783 },
16784 )
16785 .expect("write");
16786 let frame2 =
16790 super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16791 let names = frame2.column_names();
16792 let name_strs: Vec<&str> = names.iter().map(|s| s.as_str()).collect();
16793 assert!(!name_strs.contains(&"ints"));
16794 }
16795
16796 #[test]
16797 fn write_excel_with_options_default_matches_write_excel_bytes() {
16798 let frame = make_test_dataframe();
16799 let default_bytes = super::write_excel_bytes(&frame).expect("default");
16800 let options_bytes =
16801 super::write_excel_bytes_with_options(&frame, &super::ExcelWriteOptions::default())
16802 .expect("options");
16803 assert_eq!(default_bytes, options_bytes);
16804 }
16805
16806 #[test]
16807 fn excel_multiindex_roundtrip_with_explicit_index_cols() {
16808 let frame = make_row_multiindex_test_dataframe();
16809 let bytes =
16810 super::write_excel_bytes_with_options(&frame, &super::ExcelWriteOptions::default())
16811 .expect("write");
16812 let roundtrip = super::read_excel_bytes_with_index_cols(
16813 &bytes,
16814 &super::ExcelReadOptions::default(),
16815 &["region", "product", "year"],
16816 )
16817 .expect("read");
16818
16819 assert!(roundtrip.equals(&frame));
16820 assert_eq!(roundtrip.row_multiindex(), frame.row_multiindex());
16821 }
16822
16823 fn build_two_sheet_workbook_bytes() -> Vec<u8> {
16824 use rust_xlsxwriter::Workbook;
16825 let mut workbook = Workbook::new();
16826 let sheet1 = workbook.add_worksheet();
16827 sheet1.set_name("Alpha").expect("sheet name");
16828 sheet1.write_string(0, 0, "a").expect("header");
16829 sheet1.write_string(0, 1, "b").expect("header");
16830 sheet1.write_number(1, 0, 1.0).expect("data");
16831 sheet1.write_number(1, 1, 10.0).expect("data");
16832 sheet1.write_number(2, 0, 2.0).expect("data");
16833 sheet1.write_number(2, 1, 20.0).expect("data");
16834
16835 let sheet2 = workbook.add_worksheet();
16836 sheet2.set_name("Bravo").expect("sheet name");
16837 sheet2.write_string(0, 0, "name").expect("header");
16838 sheet2.write_string(1, 0, "alice").expect("data");
16839 sheet2.write_string(2, 0, "bob").expect("data");
16840
16841 let sheet3 = workbook.add_worksheet();
16842 sheet3.set_name("Charlie").expect("sheet name");
16843 sheet3.write_string(0, 0, "x").expect("header");
16844 sheet3.write_number(1, 0, 99.0).expect("data");
16845
16846 workbook.save_to_buffer().expect("save")
16847 }
16848
16849 #[test]
16850 fn read_excel_sheets_ordered_bytes_preserves_workbook_order() {
16851 use rust_xlsxwriter::Workbook;
16857 let mut workbook = Workbook::new();
16858 let s1 = workbook.add_worksheet();
16859 s1.set_name("Zulu").expect("name");
16860 s1.write_string(0, 0, "v").expect("header");
16861 s1.write_number(1, 0, 1.0).expect("data");
16862 let s2 = workbook.add_worksheet();
16863 s2.set_name("Alpha").expect("name");
16864 s2.write_string(0, 0, "v").expect("header");
16865 s2.write_number(1, 0, 2.0).expect("data");
16866 let s3 = workbook.add_worksheet();
16867 s3.set_name("Mike").expect("name");
16868 s3.write_string(0, 0, "v").expect("header");
16869 s3.write_number(1, 0, 3.0).expect("data");
16870 let bytes = workbook.save_to_buffer().expect("save");
16871
16872 let ordered = super::read_excel_sheets_ordered_bytes(
16873 &bytes,
16874 None,
16875 &super::ExcelReadOptions::default(),
16876 )
16877 .expect("read ordered");
16878 assert_eq!(
16879 ordered.iter().map(|(k, _)| k.as_str()).collect::<Vec<_>>(),
16880 vec!["Zulu", "Alpha", "Mike"],
16881 "ordered form preserves workbook order"
16882 );
16883
16884 let sorted =
16886 super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16887 .expect("read sorted");
16888 assert_eq!(
16889 sorted.keys().map(String::as_str).collect::<Vec<_>>(),
16890 vec!["Alpha", "Mike", "Zulu"],
16891 "BTreeMap form alphabetizes"
16892 );
16893 }
16894
16895 #[test]
16896 fn read_excel_sheets_ordered_bytes_selected_subset_keeps_caller_order() {
16897 let bytes = build_two_sheet_workbook_bytes();
16898 let req = vec!["Charlie".to_string(), "Alpha".to_string()];
16902 let ordered = super::read_excel_sheets_ordered_bytes(
16903 &bytes,
16904 Some(&req),
16905 &super::ExcelReadOptions::default(),
16906 )
16907 .expect("ordered subset");
16908 assert_eq!(
16909 ordered.iter().map(|(k, _)| k.as_str()).collect::<Vec<_>>(),
16910 vec!["Charlie", "Alpha"]
16911 );
16912 }
16913
16914 #[test]
16915 fn read_excel_sheets_ordered_path_matches_bytes() {
16916 let bytes = build_two_sheet_workbook_bytes();
16917 let temp = std::env::temp_dir().join("fp_io_wrt3_ordered.xlsx");
16918 std::fs::write(&temp, &bytes).expect("write temp");
16919 let via_path =
16920 super::read_excel_sheets_ordered(&temp, None, &super::ExcelReadOptions::default())
16921 .expect("read path");
16922 let via_bytes = super::read_excel_sheets_ordered_bytes(
16923 &bytes,
16924 None,
16925 &super::ExcelReadOptions::default(),
16926 )
16927 .expect("read bytes");
16928 assert_eq!(
16929 via_path.iter().map(|(k, _)| k.clone()).collect::<Vec<_>>(),
16930 via_bytes.iter().map(|(k, _)| k.clone()).collect::<Vec<_>>()
16931 );
16932 }
16933
16934 #[test]
16935 fn read_excel_sheets_bytes_all_sheets_returns_map() {
16936 let bytes = build_two_sheet_workbook_bytes();
16937 let sheets =
16938 super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16939 .expect("read sheets");
16940 assert_eq!(sheets.len(), 3);
16941 assert!(sheets.contains_key("Alpha"));
16942 assert!(sheets.contains_key("Bravo"));
16943 assert!(sheets.contains_key("Charlie"));
16944
16945 let alpha = &sheets["Alpha"];
16946 assert_eq!(alpha.index().len(), 2);
16947 assert_eq!(alpha.column_names().len(), 2);
16948
16949 let bravo = &sheets["Bravo"];
16950 assert_eq!(bravo.index().len(), 2);
16951 assert_eq!(
16952 bravo.column("name").unwrap().values()[0],
16953 Scalar::Utf8("alice".into())
16954 );
16955 }
16956
16957 #[test]
16958 fn read_excel_sheets_bytes_selects_subset() {
16959 let bytes = build_two_sheet_workbook_bytes();
16960 let selected = vec!["Alpha".to_string(), "Charlie".to_string()];
16961 let sheets = super::read_excel_sheets_bytes(
16962 &bytes,
16963 Some(&selected),
16964 &super::ExcelReadOptions::default(),
16965 )
16966 .expect("read subset");
16967 assert_eq!(sheets.len(), 2);
16968 assert!(sheets.contains_key("Alpha"));
16969 assert!(sheets.contains_key("Charlie"));
16970 assert!(!sheets.contains_key("Bravo"));
16971 }
16972
16973 #[test]
16974 fn read_excel_sheets_bytes_unknown_sheet_errors() {
16975 let bytes = build_two_sheet_workbook_bytes();
16976 let bogus = vec!["Zeta".to_string()];
16977 let err = super::read_excel_sheets_bytes(
16978 &bytes,
16979 Some(&bogus),
16980 &super::ExcelReadOptions::default(),
16981 )
16982 .unwrap_err();
16983 assert!(matches!(err, super::IoError::Excel(_)));
16984 }
16985
16986 #[test]
16987 fn read_excel_sheets_path_matches_bytes() {
16988 let bytes = build_two_sheet_workbook_bytes();
16989 let temp = std::env::temp_dir().join("fp_io_9my2_multisheet.xlsx");
16990 std::fs::write(&temp, &bytes).expect("write temp");
16991 let via_path = super::read_excel_sheets(&temp, None, &super::ExcelReadOptions::default())
16992 .expect("read path");
16993 let via_bytes =
16994 super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16995 .expect("read bytes");
16996 assert_eq!(
16997 via_path.keys().collect::<Vec<_>>(),
16998 via_bytes.keys().collect::<Vec<_>>()
16999 );
17000 }
17001
17002 #[test]
17003 fn excel_bytes_roundtrip() {
17004 let frame = make_test_dataframe();
17005 let bytes = super::write_excel_bytes(&frame).expect("write excel");
17006 assert!(!bytes.is_empty());
17007
17008 let frame2 = super::read_excel_bytes(
17009 &bytes,
17010 &super::ExcelReadOptions {
17011 index_col: Some("column_0".into()),
17012 ..Default::default()
17013 },
17014 )
17015 .expect("read excel");
17016 assert_eq!(frame2.index().len(), 3);
17017 assert_eq!(frame2.index().labels(), frame.index().labels());
17018 assert_eq!(frame2.index().name(), None);
17019 assert_eq!(
17021 frame2
17022 .column_names()
17023 .iter()
17024 .map(|s| s.as_str())
17025 .collect::<Vec<_>>(),
17026 vec!["ints", "floats", "names"]
17027 );
17028
17029 let ints = frame2.column("ints").unwrap();
17031 assert_eq!(ints.values()[0], Scalar::Int64(10));
17032 assert_eq!(ints.values()[1], Scalar::Int64(20));
17033 assert_eq!(ints.values()[2], Scalar::Int64(30));
17034
17035 let floats = frame2.column("floats").unwrap();
17037 assert_eq!(floats.values()[0], Scalar::Float64(1.5));
17038 assert_eq!(floats.values()[1], Scalar::Float64(2.5));
17039 assert_eq!(floats.values()[2], Scalar::Float64(3.5));
17040
17041 let names = frame2.column("names").unwrap();
17043 assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
17044 assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
17045 assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
17046 }
17047
17048 #[test]
17049 fn excel_file_roundtrip() {
17050 let frame = make_test_dataframe();
17051 let dir = std::env::temp_dir();
17052 let path = dir.join("fp_io_test_excel_roundtrip.xlsx");
17053
17054 super::write_excel(&frame, &path).expect("write excel file");
17055 let frame2 = super::read_excel(
17056 &path,
17057 &super::ExcelReadOptions {
17058 index_col: Some("column_0".into()),
17059 ..Default::default()
17060 },
17061 )
17062 .expect("read excel file");
17063 assert_eq!(frame2.index().len(), 3);
17064 assert_eq!(frame2.index().labels(), frame.index().labels());
17065 assert_eq!(
17066 frame2.column("ints").unwrap().values()[0],
17067 Scalar::Int64(10)
17068 );
17069 std::fs::remove_file(&path).ok();
17070 }
17071
17072 #[test]
17073 fn excel_with_nulls() {
17074 use fp_types::DType;
17075
17076 let mut columns = BTreeMap::new();
17077 columns.insert(
17078 "vals".to_string(),
17079 Column::new(
17080 DType::Float64,
17081 vec![
17082 Scalar::Float64(1.0),
17083 Scalar::Null(NullKind::NaN),
17084 Scalar::Float64(3.0),
17085 ],
17086 )
17087 .unwrap(),
17088 );
17089
17090 let labels = vec![
17091 IndexLabel::Int64(0),
17092 IndexLabel::Int64(1),
17093 IndexLabel::Int64(2),
17094 ];
17095 let frame =
17096 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
17097 .unwrap();
17098
17099 let bytes = super::write_excel_bytes(&frame).expect("write");
17100 let frame2 = super::read_excel_bytes(
17101 &bytes,
17102 &super::ExcelReadOptions {
17103 index_col: Some("column_0".into()),
17104 ..Default::default()
17105 },
17106 )
17107 .expect("read");
17108
17109 assert_eq!(frame2.column("vals").unwrap().values()[0], Scalar::Int64(1));
17111 assert!(frame2.column("vals").unwrap().values()[1].is_missing());
17113 assert_eq!(frame2.column("vals").unwrap().values()[2], Scalar::Int64(3));
17114 }
17115
17116 #[test]
17117 fn excel_bool_column() {
17118 use fp_types::DType;
17119
17120 let mut columns = BTreeMap::new();
17121 columns.insert(
17122 "flags".to_string(),
17123 Column::new(
17124 DType::Bool,
17125 vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
17126 )
17127 .unwrap(),
17128 );
17129
17130 let labels = vec![
17131 IndexLabel::Int64(0),
17132 IndexLabel::Int64(1),
17133 IndexLabel::Int64(2),
17134 ];
17135 let frame = DataFrame::new_with_column_order(
17136 Index::new(labels),
17137 columns,
17138 vec!["flags".to_string()],
17139 )
17140 .unwrap();
17141
17142 let bytes = super::write_excel_bytes(&frame).expect("write");
17143 let frame2 = super::read_excel_bytes(
17144 &bytes,
17145 &super::ExcelReadOptions {
17146 index_col: Some("column_0".into()),
17147 ..Default::default()
17148 },
17149 )
17150 .expect("read");
17151
17152 assert_eq!(
17153 frame2.column("flags").unwrap().values()[0],
17154 Scalar::Bool(true)
17155 );
17156 assert_eq!(
17157 frame2.column("flags").unwrap().values()[1],
17158 Scalar::Bool(false)
17159 );
17160 assert_eq!(
17161 frame2.column("flags").unwrap().values()[2],
17162 Scalar::Bool(true)
17163 );
17164 }
17165
17166 #[test]
17167 fn excel_skip_rows() {
17168 use fp_types::DType;
17171
17172 let mut columns = BTreeMap::new();
17173 columns.insert(
17174 "x".to_string(),
17175 Column::new(DType::Int64, vec![Scalar::Int64(1), Scalar::Int64(2)]).unwrap(),
17176 );
17177 let labels = vec![IndexLabel::Int64(0), IndexLabel::Int64(1)];
17178 let frame =
17179 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["x".to_string()])
17180 .unwrap();
17181
17182 let bytes = super::write_excel_bytes(&frame).expect("write");
17183 let frame2 = super::read_excel_bytes(
17184 &bytes,
17185 &super::ExcelReadOptions {
17186 skip_rows: 1,
17187 has_headers: false,
17188 ..Default::default()
17189 },
17190 )
17191 .expect("read with skip");
17192
17193 assert_eq!(frame2.index().len(), 2);
17196 assert!(frame2.column("column_0").is_some());
17197 }
17198
17199 #[test]
17200 fn excel_header_none_with_explicit_names_uses_names_and_keeps_first_row() {
17201 let rows = vec![
17202 vec![
17203 calamine::Data::Int(1),
17204 calamine::Data::String("alpha".to_owned()),
17205 ],
17206 vec![
17207 calamine::Data::Int(2),
17208 calamine::Data::String("beta".to_owned()),
17209 ],
17210 ];
17211
17212 let frame = super::parse_excel_rows(
17213 rows,
17214 &super::ExcelReadOptions {
17215 has_headers: false,
17216 names: Some(vec!["id".to_owned(), "label".to_owned()]),
17217 ..Default::default()
17218 },
17219 )
17220 .expect("parse excel rows with explicit names");
17221
17222 assert_eq!(frame.column_names(), vec!["id", "label"]);
17223 assert_eq!(frame.index().len(), 2);
17224 assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
17225 assert_eq!(
17226 frame.column("label").unwrap().values()[0],
17227 Scalar::Utf8("alpha".into())
17228 );
17229 assert_eq!(frame.column("id").unwrap().values()[1], Scalar::Int64(2));
17230 }
17231
17232 #[test]
17233 fn excel_header_none_with_explicit_names_preserves_index_name() {
17234 let rows = vec![
17235 vec![
17236 calamine::Data::Int(10),
17237 calamine::Data::String("alpha".to_owned()),
17238 ],
17239 vec![
17240 calamine::Data::Int(20),
17241 calamine::Data::String("beta".to_owned()),
17242 ],
17243 ];
17244
17245 let frame = super::parse_excel_rows(
17246 rows,
17247 &super::ExcelReadOptions {
17248 has_headers: false,
17249 names: Some(vec!["row_id".to_owned(), "value".to_owned()]),
17250 index_col: Some("row_id".to_owned()),
17251 ..Default::default()
17252 },
17253 )
17254 .expect("parse excel rows with named index column");
17255
17256 assert_eq!(frame.index().name(), Some("row_id"));
17257 assert_eq!(frame.index().labels()[0], IndexLabel::Int64(10));
17258 assert_eq!(frame.index().labels()[1], IndexLabel::Int64(20));
17259 assert!(frame.column("row_id").is_none());
17260 assert_eq!(
17261 frame.column("value").unwrap().values(),
17262 &[Scalar::Utf8("alpha".into()), Scalar::Utf8("beta".into())]
17263 );
17264 }
17265
17266 #[test]
17267 fn excel_explicit_names_width_mismatch_errors() {
17268 let rows = vec![vec![calamine::Data::Int(1), calamine::Data::Int(2)]];
17269
17270 let err = super::parse_excel_rows(
17271 rows,
17272 &super::ExcelReadOptions {
17273 has_headers: false,
17274 names: Some(vec!["only_one".to_owned()]),
17275 ..Default::default()
17276 },
17277 )
17278 .expect_err("names width mismatch should error");
17279
17280 assert!(
17281 matches!(err, IoError::Excel(message) if message.contains("expected 2 column names, got 1"))
17282 );
17283 }
17284
17285 #[test]
17286 fn excel_usecols_selects_subset_in_sheet_order() {
17287 let rows = vec![
17288 vec![
17289 calamine::Data::String("a".to_owned()),
17290 calamine::Data::String("b".to_owned()),
17291 calamine::Data::String("c".to_owned()),
17292 ],
17293 vec![
17294 calamine::Data::Int(1),
17295 calamine::Data::Int(2),
17296 calamine::Data::Int(3),
17297 ],
17298 ];
17299
17300 let frame = super::parse_excel_rows(
17301 rows,
17302 &super::ExcelReadOptions {
17303 usecols: Some(vec!["c".to_owned(), "a".to_owned()]),
17304 ..Default::default()
17305 },
17306 )
17307 .expect("parse excel rows with usecols");
17308
17309 assert_eq!(frame.column_names(), vec!["a", "c"]);
17310 assert_eq!(frame.column("a").unwrap().values(), &[Scalar::Int64(1)]);
17311 assert_eq!(frame.column("c").unwrap().values(), &[Scalar::Int64(3)]);
17312 assert!(frame.column("b").is_none());
17313 }
17314
17315 #[test]
17316 fn excel_usecols_with_explicit_names_filters_renamed_columns() {
17317 let rows = vec![
17318 vec![
17319 calamine::Data::Int(1),
17320 calamine::Data::String("alpha".to_owned()),
17321 ],
17322 vec![
17323 calamine::Data::Int(2),
17324 calamine::Data::String("beta".to_owned()),
17325 ],
17326 ];
17327
17328 let frame = super::parse_excel_rows(
17329 rows,
17330 &super::ExcelReadOptions {
17331 has_headers: false,
17332 names: Some(vec!["id".to_owned(), "label".to_owned()]),
17333 usecols: Some(vec!["label".to_owned()]),
17334 ..Default::default()
17335 },
17336 )
17337 .expect("parse headerless excel rows with names and usecols");
17338
17339 assert_eq!(frame.column_names(), vec!["label"]);
17340 assert_eq!(
17341 frame.column("label").unwrap().values(),
17342 &[Scalar::Utf8("alpha".into()), Scalar::Utf8("beta".into())]
17343 );
17344 assert!(frame.column("id").is_none());
17345 }
17346
17347 #[test]
17348 fn excel_usecols_missing_column_errors() {
17349 let rows = vec![
17350 vec![
17351 calamine::Data::String("a".to_owned()),
17352 calamine::Data::String("b".to_owned()),
17353 ],
17354 vec![calamine::Data::Int(1), calamine::Data::Int(2)],
17355 ];
17356
17357 let err = super::parse_excel_rows(
17358 rows,
17359 &super::ExcelReadOptions {
17360 usecols: Some(vec!["missing".to_owned()]),
17361 ..Default::default()
17362 },
17363 )
17364 .expect_err("missing excel usecols should error");
17365
17366 assert!(
17367 matches!(err, IoError::MissingUsecols(missing) if missing == vec!["missing".to_owned()])
17368 );
17369 }
17370
17371 #[test]
17372 fn excel_default_read_promotes_writer_range_index_back_to_index() {
17373 let frame = make_test_dataframe();
17374 let bytes = super::write_excel_bytes(&frame).expect("write excel");
17375
17376 let frame2 = super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default())
17377 .expect("read excel");
17378
17379 assert_eq!(frame2.index().labels(), frame.index().labels());
17380 assert_eq!(frame2.index().name(), None);
17381 assert_eq!(frame2.column_names(), vec!["ints", "floats", "names"],);
17382 assert!(frame2.column("column_0").is_none());
17383 }
17384
17385 #[test]
17386 fn excel_default_read_keeps_non_range_generated_leading_column_as_data() {
17387 let rows = vec![
17388 vec![
17389 calamine::Data::Empty,
17390 calamine::Data::String("value".to_owned()),
17391 ],
17392 vec![calamine::Data::Int(10), calamine::Data::Int(1)],
17393 vec![calamine::Data::Int(20), calamine::Data::Int(2)],
17394 ];
17395
17396 let frame = super::parse_excel_rows(rows, &super::ExcelReadOptions::default())
17397 .expect("parse excel rows");
17398
17399 assert_eq!(
17400 frame.index().labels(),
17401 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
17402 );
17403 assert_eq!(frame.column_names(), vec!["column_0", "value"]);
17404 assert_eq!(
17405 frame.column("column_0").unwrap().values(),
17406 &[Scalar::Int64(10), Scalar::Int64(20)],
17407 );
17408 }
17409
17410 #[test]
17411 fn excel_named_index_roundtrip_preserves_index_name() {
17412 use fp_types::DType;
17413
17414 let mut columns = BTreeMap::new();
17415 columns.insert(
17416 "vals".to_string(),
17417 Column::new(DType::Int64, vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
17418 );
17419
17420 let frame = DataFrame::new_with_column_order(
17421 Index::new(vec![IndexLabel::Int64(10), IndexLabel::Int64(20)]).set_name("row_id"),
17422 columns,
17423 vec!["vals".to_string()],
17424 )
17425 .unwrap();
17426
17427 let bytes = super::write_excel_bytes(&frame).expect("write excel");
17428 let frame2 = super::read_excel_bytes(
17429 &bytes,
17430 &super::ExcelReadOptions {
17431 index_col: Some("row_id".into()),
17432 ..Default::default()
17433 },
17434 )
17435 .expect("read excel");
17436
17437 assert_eq!(frame2.index().labels(), frame.index().labels());
17438 assert_eq!(frame2.index().name(), Some("row_id"));
17439 assert!(frame2.column("row_id").is_none());
17440 assert_eq!(
17441 frame2.column("vals").unwrap().values(),
17442 &[Scalar::Int64(10), Scalar::Int64(20)]
17443 );
17444 }
17445
17446 #[test]
17447 fn excel_duplicate_headers_error() {
17448 let rows = vec![
17449 vec![
17450 calamine::Data::String("dup".to_owned()),
17451 calamine::Data::String("dup".to_owned()),
17452 ],
17453 vec![calamine::Data::Int(1), calamine::Data::Int(2)],
17454 ];
17455
17456 let err = super::parse_excel_rows(rows, &super::ExcelReadOptions::default())
17457 .expect_err("duplicate headers should error");
17458 assert!(matches!(err, IoError::DuplicateColumnName(_)));
17459 }
17460
17461 use super::{
17481 SqlBackendCaps, SqlColumnSchema, SqlForeignKeySchema, SqlIfExists, SqlIndexSchema,
17482 SqlInsertMethod, SqlInspector, SqlQueryResult, SqlReadOptions, SqlReflectedTable,
17483 SqlTableSchema, SqlUniqueConstraintSchema, SqlWriteOptions, list_sql_foreign_keys,
17484 list_sql_indexes, list_sql_schemas, list_sql_tables, list_sql_unique_constraints,
17485 list_sql_views, sql_backend_caps, sql_max_identifier_length, sql_max_insert_rows,
17486 sql_max_param_count, sql_primary_key_columns, sql_server_version, sql_supports_returning,
17487 sql_supports_schemas, sql_table_comment, sql_table_schema, truncate_sql_table, write_sql,
17488 write_sql_with_options,
17489 };
17490 #[cfg(feature = "sql-sqlite")]
17491 use super::{
17492 read_sql, read_sql_chunks, read_sql_chunks_with_index_col, read_sql_chunks_with_options,
17493 read_sql_chunks_with_options_and_index_col, read_sql_query, read_sql_query_chunks,
17494 read_sql_query_chunks_with_index_col, read_sql_query_chunks_with_options,
17495 read_sql_query_chunks_with_options_and_index_col, read_sql_query_with_index_col,
17496 read_sql_query_with_options, read_sql_query_with_options_and_index_col, read_sql_table,
17497 read_sql_table_chunks, read_sql_table_chunks_with_index_col,
17498 read_sql_table_chunks_with_options, read_sql_table_chunks_with_options_and_index_col,
17499 read_sql_table_columns, read_sql_table_columns_chunks,
17500 read_sql_table_columns_chunks_with_index_col, read_sql_table_columns_with_index_col,
17501 read_sql_table_with_index_col, read_sql_table_with_options,
17502 read_sql_table_with_options_and_index_col, read_sql_with_index_col, read_sql_with_options,
17503 };
17504
17505 #[cfg(feature = "sql-sqlite")]
17510 fn make_sql_test_conn() -> rusqlite::Connection {
17511 rusqlite::Connection::open_in_memory().expect("in-memory sqlite")
17512 }
17513
17514 #[cfg(feature = "sql-sqlite")]
17515 #[test]
17516 fn sql_read_with_index_col_promotes_named_column() {
17517 let frame = make_test_dataframe();
17518 let conn = make_sql_test_conn();
17519 write_sql(&frame, &conn, "indexed_tbl", SqlIfExists::Fail).expect("write");
17520
17521 let result = read_sql_table_with_index_col(&conn, "indexed_tbl", Some("ints"))
17525 .expect("read with index");
17526 assert_eq!(result.index().name(), Some("ints"));
17527 assert_eq!(result.index().labels()[0], crate::IndexLabel::Int64(10));
17528 assert_eq!(result.index().labels()[1], crate::IndexLabel::Int64(20));
17529 assert_eq!(result.index().labels()[2], crate::IndexLabel::Int64(30));
17530 let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17532 assert!(!names.contains(&"ints"));
17533 assert!(names.contains(&"floats"));
17534 assert!(names.contains(&"names"));
17535 }
17536
17537 #[cfg(feature = "sql-sqlite")]
17538 #[test]
17539 fn sql_read_with_index_col_none_is_unchanged() {
17540 let frame = make_test_dataframe();
17541 let conn = make_sql_test_conn();
17542 write_sql(&frame, &conn, "noindex_tbl", SqlIfExists::Fail).expect("write");
17543 let baseline = read_sql_table(&conn, "noindex_tbl").expect("baseline");
17544 let result =
17545 read_sql_table_with_index_col(&conn, "noindex_tbl", None).expect("noop variant");
17546 assert_eq!(result.index().labels(), baseline.index().labels());
17547 assert_eq!(result.column_names(), baseline.column_names());
17548 }
17549
17550 #[cfg(feature = "sql-sqlite")]
17551 #[test]
17552 fn sql_read_with_index_col_unknown_column_errors() {
17553 let frame = make_test_dataframe();
17554 let conn = make_sql_test_conn();
17555 write_sql(&frame, &conn, "missing_tbl", SqlIfExists::Fail).expect("write");
17556 let err = read_sql_table_with_index_col(&conn, "missing_tbl", Some("nope")).unwrap_err();
17557 assert!(matches!(err, crate::IoError::Sql(_)));
17558 }
17559
17560 #[cfg(feature = "sql-sqlite")]
17561 #[test]
17562 fn sql_read_table_columns_returns_requested_projection_in_order() {
17563 let frame = make_test_dataframe();
17564 let conn = make_sql_test_conn();
17565 write_sql(&frame, &conn, "proj_tbl", SqlIfExists::Fail).expect("write");
17566
17567 let result = read_sql_table_columns(&conn, "proj_tbl", &["names", "ints"])
17568 .expect("subset projection");
17569 let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17570 assert_eq!(names, vec!["names", "ints"]);
17571 assert_eq!(result.index().len(), 3);
17572 assert_eq!(
17573 result.column("ints").unwrap().values()[0],
17574 Scalar::Int64(10)
17575 );
17576 assert_eq!(
17577 result.column("names").unwrap().values()[2],
17578 Scalar::Utf8("carol".into())
17579 );
17580 }
17581
17582 #[cfg(feature = "sql-sqlite")]
17583 #[test]
17584 fn sql_read_table_columns_single_column_projection() {
17585 let frame = make_test_dataframe();
17586 let conn = make_sql_test_conn();
17587 write_sql(&frame, &conn, "single_tbl", SqlIfExists::Fail).expect("write");
17588
17589 let result =
17590 read_sql_table_columns(&conn, "single_tbl", &["floats"]).expect("single projection");
17591 let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17592 assert_eq!(names, vec!["floats"]);
17593 assert_eq!(
17594 result.column("floats").unwrap().values()[1],
17595 Scalar::Float64(2.5)
17596 );
17597 }
17598
17599 #[cfg(feature = "sql-sqlite")]
17600 #[test]
17601 fn sql_read_table_columns_rejects_empty_columns() {
17602 let conn = make_sql_test_conn();
17603 let err = read_sql_table_columns(&conn, "any_tbl", &[]).unwrap_err();
17604 assert!(matches!(err, crate::IoError::Sql(_)));
17605 }
17606
17607 #[cfg(feature = "sql-sqlite")]
17608 #[test]
17609 fn sql_read_table_columns_rejects_invalid_column_name() {
17610 let frame = make_test_dataframe();
17611 let conn = make_sql_test_conn();
17612 write_sql(&frame, &conn, "valid_tbl", SqlIfExists::Fail).expect("write");
17613 let err = read_sql_table_columns(&conn, "valid_tbl", &["ints; DROP TABLE valid_tbl"])
17614 .unwrap_err();
17615 assert!(matches!(err, crate::IoError::Sql(_)));
17616 }
17617
17618 #[cfg(feature = "sql-sqlite")]
17619 #[test]
17620 fn sql_read_table_columns_rejects_invalid_table_name() {
17621 let conn = make_sql_test_conn();
17622 let err = read_sql_table_columns(&conn, "bad table", &["ints"]).unwrap_err();
17623 assert!(matches!(err, crate::IoError::Sql(_)));
17624 }
17625
17626 #[cfg(feature = "sql-sqlite")]
17627 #[test]
17628 fn sql_read_table_columns_chunks_returns_requested_projection_in_order() {
17629 let frame = make_test_dataframe();
17630 let conn = make_sql_test_conn();
17631 write_sql(&frame, &conn, "proj_chunk_tbl", SqlIfExists::Fail).expect("write");
17632
17633 let chunks = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &["names", "ints"], 2)
17634 .expect("projection chunk iterator")
17635 .collect::<Result<Vec<_>, _>>()
17636 .expect("all chunks");
17637
17638 assert_eq!(chunks.len(), 2);
17639 assert_eq!(chunks[0].column_names(), vec!["names", "ints"]);
17640 assert_eq!(
17641 chunks[0].column("names").unwrap().values(),
17642 &[
17643 Scalar::Utf8("alice".to_owned()),
17644 Scalar::Utf8("bob".to_owned())
17645 ]
17646 );
17647 assert_eq!(
17648 chunks[1].column("ints").unwrap().values(),
17649 &[Scalar::Int64(30)]
17650 );
17651 }
17652
17653 #[cfg(feature = "sql-sqlite")]
17654 #[test]
17655 fn sql_read_table_columns_chunks_rejects_zero_chunksize() {
17656 let frame = make_test_dataframe();
17657 let conn = make_sql_test_conn();
17658 write_sql(&frame, &conn, "proj_zero_chunk_tbl", SqlIfExists::Fail).expect("write");
17659
17660 let err = read_sql_table_columns_chunks(&conn, "proj_zero_chunk_tbl", &["names"], 0)
17661 .expect_err("zero projection chunksize should be rejected");
17662
17663 assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
17664 }
17665
17666 #[cfg(feature = "sql-sqlite")]
17667 #[test]
17668 fn sql_read_table_columns_chunks_rejects_invalid_projection_inputs() {
17669 let conn = make_sql_test_conn();
17670
17671 let empty = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &[], 1)
17672 .expect_err("empty projection should be rejected");
17673 assert!(matches!(empty, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
17674
17675 let invalid = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &["bad column"], 1)
17676 .expect_err("invalid projection name should be rejected");
17677 assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid column name")));
17678 }
17679
17680 #[cfg(feature = "sql-sqlite")]
17681 #[test]
17682 fn sql_read_table_columns_with_index_col_promotes_projected_column() {
17683 let frame = make_test_dataframe();
17684 let conn = make_sql_test_conn();
17685 write_sql(&frame, &conn, "proj_index_tbl", SqlIfExists::Fail).expect("write");
17686
17687 let result = read_sql_table_columns_with_index_col(
17688 &conn,
17689 "proj_index_tbl",
17690 &["names", "ints"],
17691 Some("ints"),
17692 )
17693 .expect("projection with index_col");
17694
17695 assert_eq!(result.index().name(), Some("ints"));
17696 assert_eq!(
17697 result.index().labels(),
17698 &[
17699 IndexLabel::Int64(10),
17700 IndexLabel::Int64(20),
17701 IndexLabel::Int64(30)
17702 ]
17703 );
17704 assert_eq!(result.column_names(), vec!["names"]);
17705 assert_eq!(
17706 result.column("names").unwrap().values(),
17707 &[
17708 Scalar::Utf8("alice".to_owned()),
17709 Scalar::Utf8("bob".to_owned()),
17710 Scalar::Utf8("carol".to_owned())
17711 ]
17712 );
17713 assert!(result.column("ints").is_none());
17714 }
17715
17716 #[cfg(feature = "sql-sqlite")]
17721 #[test]
17722 fn sql_read_table_columns_with_index_col_auto_projects_when_absent() {
17723 let frame = make_test_dataframe();
17724 let conn = make_sql_test_conn();
17725 write_sql(&frame, &conn, "auto_proj_tbl", SqlIfExists::Fail).expect("write");
17726
17727 let result =
17730 read_sql_table_columns_with_index_col(&conn, "auto_proj_tbl", &["names"], Some("ints"))
17731 .expect("auto-project index_col");
17732
17733 assert_eq!(result.index().name(), Some("ints"));
17734 assert_eq!(
17735 result.index().labels(),
17736 &[
17737 IndexLabel::Int64(10),
17738 IndexLabel::Int64(20),
17739 IndexLabel::Int64(30)
17740 ]
17741 );
17742 assert_eq!(result.column_names(), vec!["names"]);
17743 assert!(result.column("ints").is_none());
17744 assert_eq!(
17745 result.column("names").unwrap().values(),
17746 &[
17747 Scalar::Utf8("alice".to_owned()),
17748 Scalar::Utf8("bob".to_owned()),
17749 Scalar::Utf8("carol".to_owned())
17750 ]
17751 );
17752 }
17753
17754 #[cfg(feature = "sql-sqlite")]
17755 #[test]
17756 fn sql_read_table_columns_chunks_with_index_col_auto_projects_when_absent() {
17757 let frame = make_test_dataframe();
17758 let conn = make_sql_test_conn();
17759 write_sql(&frame, &conn, "auto_proj_chunks_tbl", SqlIfExists::Fail).expect("write");
17760
17761 let chunks = read_sql_table_columns_chunks_with_index_col(
17764 &conn,
17765 "auto_proj_chunks_tbl",
17766 &["names"],
17767 Some("ints"),
17768 2,
17769 )
17770 .expect("auto-project chunks")
17771 .collect::<Result<Vec<_>, _>>()
17772 .expect("all chunks");
17773
17774 assert_eq!(chunks.len(), 2);
17775 assert_eq!(chunks[0].index().name(), Some("ints"));
17776 assert_eq!(
17777 chunks[0].index().labels(),
17778 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
17779 );
17780 assert_eq!(chunks[0].column_names(), vec!["names"]);
17781 assert!(chunks[0].column("ints").is_none());
17782 assert_eq!(
17783 chunks[0].column("names").unwrap().values(),
17784 &[
17785 Scalar::Utf8("alice".to_owned()),
17786 Scalar::Utf8("bob".to_owned())
17787 ]
17788 );
17789 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
17790 assert_eq!(chunks[1].column_names(), vec!["names"]);
17791 assert!(chunks[1].column("ints").is_none());
17792 }
17793
17794 #[cfg(feature = "sql-sqlite")]
17799 #[test]
17800 fn sql_read_table_columns_with_index_col_no_duplication_when_listed() {
17801 let frame = make_test_dataframe();
17802 let conn = make_sql_test_conn();
17803 write_sql(&frame, &conn, "no_dup_tbl", SqlIfExists::Fail).expect("write");
17804
17805 let result = read_sql_table_columns_with_index_col(
17807 &conn,
17808 "no_dup_tbl",
17809 &["names", "ints"],
17810 Some("ints"),
17811 )
17812 .expect("explicit include + index_col");
17813
17814 assert_eq!(result.index().name(), Some("ints"));
17815 assert_eq!(result.column_names(), vec!["names"]);
17816 assert!(result.column("ints").is_none());
17817 }
17818
17819 #[cfg(feature = "sql-sqlite")]
17820 #[test]
17821 fn sql_read_table_columns_with_index_col_none_keeps_projection_and_range_index() {
17822 let frame = make_test_dataframe();
17823 let conn = make_sql_test_conn();
17824 write_sql(&frame, &conn, "proj_no_index_tbl", SqlIfExists::Fail).expect("write");
17825
17826 let result = read_sql_table_columns_with_index_col(
17827 &conn,
17828 "proj_no_index_tbl",
17829 &["floats", "names"],
17830 None,
17831 )
17832 .expect("projection without index_col");
17833
17834 assert_eq!(
17835 result.index().labels(),
17836 &[
17837 IndexLabel::Int64(0),
17838 IndexLabel::Int64(1),
17839 IndexLabel::Int64(2)
17840 ]
17841 );
17842 assert_eq!(result.column_names(), vec!["floats", "names"]);
17843 assert_eq!(
17844 result.column("floats").unwrap().values()[1],
17845 Scalar::Float64(2.5)
17846 );
17847 }
17848
17849 #[cfg(feature = "sql-sqlite")]
17850 #[test]
17851 fn sql_read_table_columns_chunks_with_index_col_promotes_each_chunk_index() {
17852 let frame = make_test_dataframe();
17853 let conn = make_sql_test_conn();
17854 write_sql(&frame, &conn, "proj_index_chunk_tbl", SqlIfExists::Fail).expect("write");
17855
17856 let chunks = read_sql_table_columns_chunks_with_index_col(
17857 &conn,
17858 "proj_index_chunk_tbl",
17859 &["ints", "names"],
17860 Some("ints"),
17861 2,
17862 )
17863 .expect("indexed projection chunk iterator")
17864 .collect::<Result<Vec<_>, _>>()
17865 .expect("all chunks");
17866
17867 assert_eq!(chunks.len(), 2);
17868 assert_eq!(chunks[0].index().name(), Some("ints"));
17869 assert_eq!(
17870 chunks[0].index().labels(),
17871 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
17872 );
17873 assert_eq!(chunks[0].column_names(), vec!["names"]);
17874 assert_eq!(
17875 chunks[0].column("names").unwrap().values(),
17876 &[
17877 Scalar::Utf8("alice".to_owned()),
17878 Scalar::Utf8("bob".to_owned())
17879 ]
17880 );
17881 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
17882 assert_eq!(
17883 chunks[1].column("names").unwrap().values(),
17884 &[Scalar::Utf8("carol".to_owned())]
17885 );
17886 }
17887
17888 #[cfg(feature = "sql-sqlite")]
17889 #[test]
17890 fn sql_read_table_columns_chunks_with_index_col_validates_projection_and_index() {
17891 let frame = make_test_dataframe();
17892 let conn = make_sql_test_conn();
17893 write_sql(&frame, &conn, "proj_index_error_tbl", SqlIfExists::Fail).expect("write");
17894
17895 let empty = read_sql_table_columns_chunks_with_index_col(
17896 &conn,
17897 "proj_index_error_tbl",
17898 &[],
17899 Some("ints"),
17900 1,
17901 )
17902 .expect_err("empty projection should be rejected");
17903 assert!(matches!(empty, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
17904
17905 let invalid = read_sql_table_columns_with_index_col(
17906 &conn,
17907 "proj_index_error_tbl",
17908 &["bad column"],
17909 Some("ints"),
17910 )
17911 .expect_err("invalid projection name should be rejected");
17912 assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid column name")));
17913 }
17914
17915 #[cfg(feature = "sql-sqlite")]
17916 #[test]
17917 fn sql_read_query_with_index_col_works_on_arbitrary_select() {
17918 let frame = make_test_dataframe();
17919 let conn = make_sql_test_conn();
17920 write_sql(&frame, &conn, "queried_tbl", SqlIfExists::Fail).expect("write");
17921 let result = read_sql_with_index_col(
17922 &conn,
17923 "SELECT names AS label, ints, floats FROM queried_tbl ORDER BY ints DESC",
17924 Some("label"),
17925 )
17926 .expect("read query with index");
17927 assert_eq!(result.index().name(), Some("label"));
17928 assert_eq!(
17931 result.index().labels()[0],
17932 crate::IndexLabel::Utf8("carol".into())
17933 );
17934 assert_eq!(
17935 result.index().labels()[2],
17936 crate::IndexLabel::Utf8("alice".into())
17937 );
17938 }
17939
17940 #[cfg(feature = "sql-sqlite")]
17941 #[test]
17942 fn sql_write_read_roundtrip() {
17943 let frame = make_test_dataframe();
17944 let conn = make_sql_test_conn();
17945
17946 write_sql(&frame, &conn, "test_table", SqlIfExists::Fail).expect("write sql");
17947
17948 let frame2 = read_sql_table(&conn, "test_table").expect("read sql");
17949 assert_eq!(frame2.index().len(), 3);
17950
17951 let ints = frame2.column("ints").unwrap();
17953 assert_eq!(ints.values()[0], Scalar::Int64(10));
17954 assert_eq!(ints.values()[1], Scalar::Int64(20));
17955 assert_eq!(ints.values()[2], Scalar::Int64(30));
17956
17957 let floats = frame2.column("floats").unwrap();
17959 assert_eq!(floats.values()[0], Scalar::Float64(1.5));
17960 assert_eq!(floats.values()[1], Scalar::Float64(2.5));
17961 assert_eq!(floats.values()[2], Scalar::Float64(3.5));
17962
17963 let names = frame2.column("names").unwrap();
17965 assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
17966 assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
17967 assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
17968 }
17969
17970 #[derive(Default)]
17971 struct DollarMarkerSqlConn {
17972 insert_sql: std::cell::RefCell<Vec<String>>,
17973 inserted_rows: std::cell::RefCell<Vec<Vec<Vec<Scalar>>>>,
17974 }
17975
17976 impl super::SqlConnection for DollarMarkerSqlConn {
17977 fn query(
17978 &self,
17979 _query: &str,
17980 _params: &[Scalar],
17981 ) -> Result<super::SqlQueryResult, IoError> {
17982 Err(IoError::Sql("mock connection does not read".to_owned()))
17983 }
17984
17985 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
17986 Ok(())
17987 }
17988
17989 fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
17990 Ok(false)
17991 }
17992
17993 fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
17994 self.insert_sql.borrow_mut().push(insert_sql.to_owned());
17995 self.inserted_rows.borrow_mut().push(rows.to_vec());
17996 Ok(())
17997 }
17998
17999 fn dtype_sql(&self, dtype: DType) -> &'static str {
18000 match dtype {
18001 DType::Int64
18002 | DType::Int64Nullable
18003 | DType::Bool
18004 | DType::BoolNullable
18005 | DType::Timedelta64
18006 | DType::Datetime64 => "BIGINT",
18007 DType::Float64 => "DOUBLE PRECISION",
18008 DType::Utf8
18009 | DType::Categorical
18010 | DType::Null
18011 | DType::Sparse
18012 | DType::Period
18013 | DType::Interval => "TEXT",
18014 }
18015 }
18016
18017 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18018 "TEXT"
18019 }
18020
18021 fn parameter_marker(&self, ordinal: usize) -> String {
18022 format!("${ordinal}")
18023 }
18024 }
18025
18026 #[test]
18027 fn sql_query_builders_quote_select_and_projection_identifiers() {
18028 let conn = DollarMarkerSqlConn::default();
18029 assert_eq!(
18030 super::sql_select_all_query(&conn, "portable_tbl").expect("select all query"),
18031 "SELECT * FROM \"portable_tbl\""
18032 );
18033 assert_eq!(
18034 super::sql_select_columns_query(&conn, "portable_tbl", &["names", "ints"])
18035 .expect("projection query"),
18036 "SELECT \"names\", \"ints\" FROM \"portable_tbl\""
18037 );
18038
18039 let err = super::sql_select_columns_query(&conn, "portable_tbl", &["bad column"])
18040 .expect_err("projection identifiers stay validated");
18041 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid column name")));
18042 }
18043
18044 #[test]
18045 fn sql_query_builders_create_and_insert_use_backend_contracts() {
18046 let conn = DollarMarkerSqlConn::default();
18047 let column_defs = vec![
18048 super::sql_column_definition(&conn, "row id", "TEXT").expect("index column definition"),
18049 super::sql_column_definition(&conn, "value\"raw", "BIGINT")
18050 .expect("value column definition"),
18051 ];
18052
18053 assert_eq!(
18054 super::sql_create_table_query(&conn, "typed_tbl", &column_defs)
18055 .expect("create table query"),
18056 "CREATE TABLE IF NOT EXISTS \"typed_tbl\" (\"row id\" TEXT, \"value\"\"raw\" BIGINT)"
18057 );
18058
18059 let insert_columns = vec!["row id".to_owned(), "value\"raw".to_owned()];
18060 assert_eq!(
18061 super::sql_insert_rows_query(&conn, "typed_tbl", &insert_columns)
18062 .expect("insert row query"),
18063 "INSERT INTO \"typed_tbl\" (\"row id\", \"value\"\"raw\") VALUES ($1, $2)"
18064 );
18065 }
18066
18067 #[test]
18073 fn sql_query_builders_use_backend_quote_identifier_override() {
18074 #[derive(Default)]
18075 struct BacktickSqlConn;
18076 impl super::SqlConnection for BacktickSqlConn {
18077 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
18078 Ok(super::SqlQueryResult {
18079 columns: vec![],
18080 rows: vec![],
18081 })
18082 }
18083 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18084 Ok(())
18085 }
18086 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
18087 Ok(false)
18088 }
18089 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18090 Ok(())
18091 }
18092 fn dtype_sql(&self, _dtype: DType) -> &'static str {
18093 "TEXT"
18094 }
18095 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18096 "TEXT"
18097 }
18098 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
18099 if ident.contains('\0') {
18100 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
18101 }
18102 Ok(format!("`{}`", ident.replace('`', "``")))
18104 }
18105 }
18106
18107 let conn = BacktickSqlConn;
18108 assert_eq!(
18110 super::sql_select_all_query(&conn, "users").expect("select all"),
18111 "SELECT * FROM `users`"
18112 );
18113 assert_eq!(
18114 super::sql_select_columns_query(&conn, "users", &["id", "name"]).expect("projection"),
18115 "SELECT `id`, `name` FROM `users`"
18116 );
18117 let col_defs = vec![super::sql_column_definition(&conn, "id", "INTEGER").expect("col def")];
18119 assert_eq!(
18120 super::sql_create_table_query(&conn, "users", &col_defs).expect("create"),
18121 "CREATE TABLE IF NOT EXISTS `users` (`id` INTEGER)"
18122 );
18123 let insert_cols = vec!["id".to_owned(), "name".to_owned()];
18124 assert_eq!(
18125 super::sql_insert_rows_query(&conn, "users", &insert_cols).expect("insert"),
18126 "INSERT INTO `users` (`id`, `name`) VALUES (?, ?)"
18127 );
18128 }
18129
18130 #[test]
18131 fn sql_write_uses_backend_parameter_markers() {
18132 let frame = make_test_dataframe();
18133 let conn = DollarMarkerSqlConn::default();
18134
18135 write_sql(&frame, &conn, "portable_tbl", SqlIfExists::Fail)
18136 .expect("write through marker-aware mock backend");
18137
18138 let insert_sql = conn.insert_sql.borrow();
18139 assert_eq!(
18140 insert_sql.as_slice(),
18141 &["INSERT INTO \"portable_tbl\" (\"ints\", \"floats\", \"names\") VALUES ($1, $2, $3)"
18142 .to_owned()]
18143 );
18144 let inserted_rows = conn.inserted_rows.borrow();
18145 assert_eq!(inserted_rows[0].len(), frame.index().len());
18146 assert_eq!(inserted_rows[0][0][0], Scalar::Int64(10));
18147 assert_eq!(inserted_rows[0][2][2], Scalar::Utf8("carol".into()));
18148 }
18149
18150 #[cfg(feature = "sql-sqlite")]
18151 #[test]
18152 fn sql_write_with_options_includes_named_index_column() {
18153 let mut columns = BTreeMap::new();
18154 columns.insert(
18155 "vals".to_string(),
18156 Column::new(DType::Int64, vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
18157 );
18158
18159 let frame = DataFrame::new_with_column_order(
18160 Index::new(vec![IndexLabel::Int64(101), IndexLabel::Int64(102)]).set_name("row_id"),
18161 columns,
18162 vec!["vals".to_string()],
18163 )
18164 .unwrap();
18165 let conn = make_sql_test_conn();
18166
18167 write_sql_with_options(
18168 &frame,
18169 &conn,
18170 "indexed_write_tbl",
18171 &SqlWriteOptions {
18172 if_exists: SqlIfExists::Fail,
18173 index: true,
18174 index_label: None,
18175 schema: None,
18176 dtype: None,
18177 method: SqlInsertMethod::Single,
18178 chunksize: None,
18179 },
18180 )
18181 .expect("write with named index");
18182
18183 let roundtrip = read_sql_table_with_index_col(&conn, "indexed_write_tbl", Some("row_id"))
18184 .expect("read with promoted index");
18185 assert_eq!(roundtrip.index().name(), Some("row_id"));
18186 assert_eq!(roundtrip.index().labels(), frame.index().labels());
18187 assert!(roundtrip.column("row_id").is_none());
18188 assert_eq!(
18189 roundtrip.column("vals").unwrap().values(),
18190 &[Scalar::Int64(10), Scalar::Int64(20)]
18191 );
18192 }
18193
18194 #[cfg(feature = "sql-sqlite")]
18195 #[test]
18196 fn sql_write_with_options_unnamed_index_defaults_to_index_column_name() {
18197 let frame = make_test_dataframe();
18198 let conn = make_sql_test_conn();
18199
18200 write_sql_with_options(
18201 &frame,
18202 &conn,
18203 "default_index_tbl",
18204 &SqlWriteOptions {
18205 if_exists: SqlIfExists::Fail,
18206 index: true,
18207 index_label: None,
18208 schema: None,
18209 dtype: None,
18210 method: SqlInsertMethod::Single,
18211 chunksize: None,
18212 },
18213 )
18214 .expect("write with unnamed index");
18215
18216 let raw = read_sql_table(&conn, "default_index_tbl").expect("read raw table");
18217 assert!(raw.column("index").is_some());
18218 assert_eq!(raw.column("index").unwrap().values()[0], Scalar::Int64(0));
18219 assert_eq!(raw.column("index").unwrap().values()[2], Scalar::Int64(2));
18220 }
18221
18222 #[cfg(feature = "sql-sqlite")]
18223 #[test]
18224 fn sql_write_with_options_index_label_overrides_name() {
18225 let mut columns = BTreeMap::new();
18226 columns.insert(
18227 "vals".to_string(),
18228 Column::new(DType::Int64, vec![Scalar::Int64(7), Scalar::Int64(8)]).unwrap(),
18229 );
18230
18231 let frame = DataFrame::new_with_column_order(
18232 Index::new(vec![IndexLabel::Int64(1), IndexLabel::Int64(2)]).set_name("row_id"),
18233 columns,
18234 vec!["vals".to_string()],
18235 )
18236 .unwrap();
18237 let conn = make_sql_test_conn();
18238
18239 write_sql_with_options(
18240 &frame,
18241 &conn,
18242 "override_index_tbl",
18243 &SqlWriteOptions {
18244 if_exists: SqlIfExists::Fail,
18245 index: true,
18246 index_label: Some("custom_id".to_string()),
18247 schema: None,
18248 dtype: None,
18249 method: SqlInsertMethod::Single,
18250 chunksize: None,
18251 },
18252 )
18253 .expect("write with custom index label");
18254
18255 let raw = read_sql_table(&conn, "override_index_tbl").expect("read raw table");
18256 assert!(raw.column("custom_id").is_some());
18257 assert!(raw.column("row_id").is_none());
18258 assert_eq!(
18259 raw.column("custom_id").unwrap().values()[0],
18260 Scalar::Int64(1)
18261 );
18262 assert_eq!(
18263 raw.column("custom_id").unwrap().values()[1],
18264 Scalar::Int64(2)
18265 );
18266 }
18267
18268 #[cfg(feature = "sql-sqlite")]
18269 #[test]
18270 fn sql_write_with_options_index_false_omits_index_column() {
18271 let mut columns = BTreeMap::new();
18272 columns.insert(
18273 "vals".to_string(),
18274 Column::new(DType::Int64, vec![Scalar::Int64(5), Scalar::Int64(6)]).unwrap(),
18275 );
18276
18277 let frame = DataFrame::new_with_column_order(
18278 Index::new(vec![IndexLabel::Int64(9), IndexLabel::Int64(10)]).set_name("row_id"),
18279 columns,
18280 vec!["vals".to_string()],
18281 )
18282 .unwrap();
18283 let conn = make_sql_test_conn();
18284
18285 write_sql_with_options(
18286 &frame,
18287 &conn,
18288 "no_index_write_tbl",
18289 &SqlWriteOptions {
18290 if_exists: SqlIfExists::Fail,
18291 index: false,
18292 index_label: Some("custom_id".to_string()),
18293 schema: None,
18294 dtype: None,
18295 method: SqlInsertMethod::Single,
18296 chunksize: None,
18297 },
18298 )
18299 .expect("write without index");
18300
18301 let raw = read_sql_table(&conn, "no_index_write_tbl").expect("read raw table");
18302 assert!(raw.column("row_id").is_none());
18303 assert!(raw.column("custom_id").is_none());
18304 let names: Vec<&str> = raw
18305 .column_names()
18306 .iter()
18307 .map(|name| name.as_str())
18308 .collect();
18309 assert_eq!(names, vec!["vals"]);
18310 }
18311
18312 #[cfg(feature = "sql-sqlite")]
18313 #[test]
18314 fn sql_read_with_query() {
18315 let frame = make_test_dataframe();
18316 let conn = make_sql_test_conn();
18317 write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18318
18319 let filtered = read_sql(&conn, "SELECT ints, names FROM data WHERE ints > 15").unwrap();
18320 assert_eq!(filtered.index().len(), 2); assert_eq!(
18322 filtered.column("ints").unwrap().values()[0],
18323 Scalar::Int64(20)
18324 );
18325 assert_eq!(
18326 filtered.column("names").unwrap().values()[1],
18327 Scalar::Utf8("carol".into())
18328 );
18329 }
18330
18331 #[cfg(feature = "sql-sqlite")]
18332 #[test]
18333 fn sql_read_query_alias_matches_read_sql_query_path() {
18334 let frame = make_test_dataframe();
18335 let conn = make_sql_test_conn();
18336 write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18337
18338 let queried = read_sql_query(
18339 &conn,
18340 "SELECT names, ints FROM data WHERE ints >= 20 ORDER BY ints",
18341 )
18342 .unwrap();
18343
18344 assert_eq!(queried.column_names(), vec!["names", "ints"]);
18345 assert_eq!(queried.index().len(), 2);
18346 assert_eq!(
18347 queried.column("names").unwrap().values(),
18348 &[
18349 Scalar::Utf8("bob".to_owned()),
18350 Scalar::Utf8("carol".to_owned())
18351 ]
18352 );
18353 assert_eq!(
18354 queried.column("ints").unwrap().values(),
18355 &[Scalar::Int64(20), Scalar::Int64(30)]
18356 );
18357 }
18358
18359 #[cfg(feature = "sql-sqlite")]
18360 #[test]
18361 fn sql_read_query_with_options_applies_params_and_parse_dates() {
18362 let conn = make_sql_test_conn();
18363 conn.execute_batch(
18364 "CREATE TABLE events (ts TEXT, value INTEGER);
18365 INSERT INTO events (ts, value) VALUES
18366 ('2024-01-15', 1),
18367 ('2024-02-01 05:06:07', 2),
18368 ('2024-03-03', 3);",
18369 )
18370 .expect("create events table");
18371
18372 let frame = read_sql_query_with_options(
18373 &conn,
18374 "SELECT ts, value FROM events WHERE value > ? ORDER BY value",
18375 &SqlReadOptions {
18376 params: Some(vec![Scalar::Int64(1)]),
18377 parse_dates: Some(vec!["ts".to_owned()]),
18378 coerce_float: false,
18379 dtype: None,
18380 schema: None,
18381 columns: None,
18382 index_col: None,
18383 },
18384 )
18385 .expect("read_sql_query with options");
18386
18387 assert_eq!(frame.column_names(), vec!["ts", "value"]);
18388 assert_eq!(
18389 frame.column("ts").unwrap().values(),
18390 &[
18391 Scalar::Utf8("2024-02-01 05:06:07".to_owned()),
18392 Scalar::Utf8("2024-03-03 00:00:00".to_owned())
18393 ]
18394 );
18395 assert_eq!(
18396 frame.column("value").unwrap().values(),
18397 &[Scalar::Int64(2), Scalar::Int64(3)]
18398 );
18399 }
18400
18401 #[test]
18402 fn sql_read_query_with_options_and_index_col_uses_generic_connection() {
18403 use std::cell::RefCell;
18404
18405 struct RecordingSqlConn {
18406 seen_query: RefCell<Option<String>>,
18407 seen_params: RefCell<Vec<Scalar>>,
18408 }
18409
18410 impl super::SqlConnection for RecordingSqlConn {
18411 fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
18412 *self.seen_query.borrow_mut() = Some(query.to_owned());
18413 *self.seen_params.borrow_mut() = params.to_vec();
18414 Ok(SqlQueryResult {
18415 columns: vec![
18416 "row_id".to_owned(),
18417 "ts".to_owned(),
18418 "amount".to_owned(),
18419 "label".to_owned(),
18420 ],
18421 rows: vec![
18422 vec![
18423 Scalar::Int64(101),
18424 Scalar::Utf8("2024-01-15".to_owned()),
18425 Scalar::Utf8("$1.25".to_owned()),
18426 Scalar::Utf8("alpha".to_owned()),
18427 ],
18428 vec![
18429 Scalar::Int64(102),
18430 Scalar::Utf8("2024-01-16".to_owned()),
18431 Scalar::Utf8("2.50".to_owned()),
18432 Scalar::Utf8("beta".to_owned()),
18433 ],
18434 ],
18435 })
18436 }
18437
18438 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18439 Ok(())
18440 }
18441
18442 fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
18443 Ok(false)
18444 }
18445
18446 fn insert_rows(&self, _insert_sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18447 Ok(())
18448 }
18449
18450 fn dtype_sql(&self, _dtype: DType) -> &'static str {
18451 "TEXT"
18452 }
18453
18454 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18455 "BIGINT"
18456 }
18457 }
18458
18459 let conn = RecordingSqlConn {
18460 seen_query: RefCell::new(None),
18461 seen_params: RefCell::new(Vec::new()),
18462 };
18463 let query = "SELECT row_id, ts, amount, label FROM events WHERE amount > ?";
18464 let frame = super::read_sql_query_with_options_and_index_col(
18465 &conn,
18466 query,
18467 &SqlReadOptions {
18468 params: Some(vec![Scalar::Float64(1.0)]),
18469 parse_dates: Some(vec!["ts".to_owned()]),
18470 coerce_float: true,
18471 dtype: None,
18472 schema: None,
18473 columns: None,
18474 index_col: Some("amount".to_owned()),
18475 },
18476 Some("row_id"),
18477 )
18478 .expect("generic read_sql query with options and index_col");
18479
18480 assert_eq!(conn.seen_query.borrow().as_deref(), Some(query));
18481 assert_eq!(
18482 conn.seen_params.borrow().as_slice(),
18483 &[Scalar::Float64(1.0)]
18484 );
18485 assert_eq!(frame.index().name(), Some("row_id"));
18486 assert_eq!(
18487 frame.index().labels(),
18488 &[IndexLabel::Int64(101), IndexLabel::Int64(102)]
18489 );
18490 assert_eq!(frame.column_names(), vec!["ts", "amount", "label"]);
18491 assert_eq!(
18492 frame.column("ts").unwrap().values(),
18493 &[
18494 Scalar::Utf8("2024-01-15 00:00:00".to_owned()),
18495 Scalar::Utf8("2024-01-16 00:00:00".to_owned())
18496 ]
18497 );
18498 assert_eq!(
18499 frame.column("amount").unwrap().values(),
18500 &[Scalar::Float64(1.25), Scalar::Float64(2.5)]
18501 );
18502 assert_eq!(
18503 frame.column("label").unwrap().values(),
18504 &[
18505 Scalar::Utf8("alpha".to_owned()),
18506 Scalar::Utf8("beta".to_owned())
18507 ]
18508 );
18509 }
18510
18511 #[cfg(feature = "sql-sqlite")]
18512 #[test]
18513 fn sql_read_query_with_index_col_promotes_named_column() {
18514 let frame = make_test_dataframe();
18515 let conn = make_sql_test_conn();
18516 write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18517
18518 let indexed = read_sql_query_with_index_col(
18519 &conn,
18520 "SELECT names, ints FROM data ORDER BY ints",
18521 Some("names"),
18522 )
18523 .unwrap();
18524
18525 assert_eq!(
18526 indexed.index().labels(),
18527 &[
18528 IndexLabel::Utf8("alice".to_owned()),
18529 IndexLabel::Utf8("bob".to_owned()),
18530 IndexLabel::Utf8("carol".to_owned())
18531 ]
18532 );
18533 assert_eq!(indexed.index().name(), Some("names"));
18534 assert!(indexed.column("names").is_none());
18535 assert_eq!(
18536 indexed.column("ints").unwrap().values(),
18537 &[Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)]
18538 );
18539 }
18540
18541 #[cfg(feature = "sql-sqlite")]
18542 #[test]
18543 fn sql_read_query_chunks_alias_batches_rows() {
18544 let conn = make_sql_test_conn();
18545 conn.execute_batch(
18546 "CREATE TABLE query_chunked (id INTEGER, name TEXT);
18547 INSERT INTO query_chunked (id, name) VALUES
18548 (1, 'alpha'),
18549 (2, 'beta'),
18550 (3, 'gamma');",
18551 )
18552 .expect("create query_chunked table");
18553
18554 let chunks =
18555 read_sql_query_chunks(&conn, "SELECT id, name FROM query_chunked ORDER BY id", 2)
18556 .expect("query chunk iterator")
18557 .collect::<Result<Vec<_>, _>>()
18558 .expect("all chunks");
18559
18560 assert_eq!(chunks.len(), 2);
18561 assert_eq!(chunks[0].column_names(), vec!["id", "name"]);
18562 assert_eq!(
18563 chunks[0].column("id").unwrap().values(),
18564 &[Scalar::Int64(1), Scalar::Int64(2)]
18565 );
18566 assert_eq!(
18567 chunks[1].column("name").unwrap().values(),
18568 &[Scalar::Utf8("gamma".to_owned())]
18569 );
18570 }
18571
18572 #[cfg(feature = "sql-sqlite")]
18573 #[test]
18574 fn sql_read_query_chunks_with_options_applies_params_parse_dates_and_coerce_float() {
18575 let conn = make_sql_test_conn();
18576 conn.execute_batch(
18577 "CREATE TABLE query_events (ts TEXT, amount TEXT, keep INTEGER);
18578 INSERT INTO query_events (ts, amount, keep) VALUES
18579 ('2024-01-15', '12.50', 0),
18580 ('2024-02-01 05:06:07', '$1,234.50', 1),
18581 ('2024-03-03', '-3.25', 1);",
18582 )
18583 .expect("create query_events table");
18584
18585 let chunks = read_sql_query_chunks_with_options(
18586 &conn,
18587 "SELECT ts, amount FROM query_events WHERE keep = ? ORDER BY ts",
18588 &SqlReadOptions {
18589 params: Some(vec![Scalar::Int64(1)]),
18590 parse_dates: Some(vec!["ts".to_owned()]),
18591 coerce_float: true,
18592 dtype: None,
18593 schema: None,
18594 columns: None,
18595 index_col: None,
18596 },
18597 1,
18598 )
18599 .expect("query chunk iterator")
18600 .collect::<Result<Vec<_>, _>>()
18601 .expect("all chunks");
18602
18603 assert_eq!(chunks.len(), 2);
18604 assert_eq!(
18605 chunks[0].column("ts").unwrap().values(),
18606 &[Scalar::Utf8("2024-02-01 05:06:07".to_owned())]
18607 );
18608 assert_eq!(
18609 chunks[0].column("amount").unwrap().values(),
18610 &[Scalar::Float64(1234.5)]
18611 );
18612 assert_eq!(
18613 chunks[1].column("ts").unwrap().values(),
18614 &[Scalar::Utf8("2024-03-03 00:00:00".to_owned())]
18615 );
18616 assert_eq!(
18617 chunks[1].column("amount").unwrap().values(),
18618 &[Scalar::Float64(-3.25)]
18619 );
18620 }
18621
18622 #[test]
18623 fn sql_read_chunks_uses_paged_queries_when_backend_opts_in() {
18624 use std::cell::RefCell;
18625
18626 struct PagedChunksConn {
18627 queries: RefCell<Vec<(String, Vec<Scalar>)>>,
18628 rows: Vec<Vec<Scalar>>,
18629 }
18630
18631 impl PagedChunksConn {
18632 fn page_bounds(params: &[Scalar]) -> (usize, usize) {
18633 let [
18634 Scalar::Int64(1),
18635 Scalar::Int64(limit),
18636 Scalar::Int64(offset),
18637 ] = params
18638 else {
18639 assert_eq!(
18640 params,
18641 &[Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(0),],
18642 "expected original param plus LIMIT/OFFSET params"
18643 );
18644 return (0, 0);
18645 };
18646 (
18647 usize::try_from(*limit).expect("non-negative limit"),
18648 usize::try_from(*offset).expect("non-negative offset"),
18649 )
18650 }
18651 }
18652
18653 impl super::SqlConnection for PagedChunksConn {
18654 fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
18655 self.queries
18656 .borrow_mut()
18657 .push((query.to_owned(), params.to_vec()));
18658 assert!(
18659 query.contains("frankenpandas_sql_chunk_source")
18660 && query.contains("LIMIT ? OFFSET ?"),
18661 "paged chunk path should wrap the caller query with LIMIT/OFFSET, got {query}"
18662 );
18663
18664 let (limit, offset) = Self::page_bounds(params);
18665 let rows = self.rows.iter().skip(offset).take(limit).cloned().collect();
18666 Ok(SqlQueryResult {
18667 columns: vec!["id".to_owned(), "name".to_owned()],
18668 rows,
18669 })
18670 }
18671
18672 fn supports_paged_sql_chunks(&self) -> bool {
18673 true
18674 }
18675
18676 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18677 Ok(())
18678 }
18679
18680 fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
18681 Ok(false)
18682 }
18683
18684 fn insert_rows(&self, _insert_sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18685 Ok(())
18686 }
18687
18688 fn dtype_sql(&self, _dtype: DType) -> &'static str {
18689 "TEXT"
18690 }
18691
18692 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18693 "TEXT"
18694 }
18695 }
18696
18697 let conn = PagedChunksConn {
18698 queries: RefCell::new(Vec::new()),
18699 rows: vec![
18700 vec![Scalar::Int64(1), Scalar::Utf8("a".to_owned())],
18701 vec![Scalar::Int64(2), Scalar::Utf8("b".to_owned())],
18702 vec![Scalar::Int64(3), Scalar::Utf8("c".to_owned())],
18703 vec![Scalar::Int64(4), Scalar::Utf8("d".to_owned())],
18704 vec![Scalar::Int64(5), Scalar::Utf8("e".to_owned())],
18705 ],
18706 };
18707
18708 let chunks = read_sql_chunks_with_options(
18709 &conn,
18710 "SELECT id, name FROM paged_source WHERE keep = ? ORDER BY id;",
18711 &SqlReadOptions {
18712 params: Some(vec![Scalar::Int64(1)]),
18713 ..SqlReadOptions::default()
18714 },
18715 2,
18716 )
18717 .expect("paged chunk iterator")
18718 .collect::<Result<Vec<_>, _>>()
18719 .expect("all chunks");
18720
18721 assert_eq!(chunks.len(), 3);
18722 assert_eq!(
18723 chunks[0].column("id").unwrap().values(),
18724 &[Scalar::Int64(1), Scalar::Int64(2)]
18725 );
18726 assert_eq!(
18727 chunks[1].column("id").unwrap().values(),
18728 &[Scalar::Int64(3), Scalar::Int64(4)]
18729 );
18730 assert_eq!(
18731 chunks[2].column("name").unwrap().values(),
18732 &[Scalar::Utf8("e".to_owned())]
18733 );
18734
18735 let queries = conn.queries.borrow();
18736 let expected_params = vec![
18737 vec![Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(0)],
18738 vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(0)],
18739 vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(2)],
18740 vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(4)],
18741 ];
18742 assert_eq!(
18743 queries
18744 .iter()
18745 .map(|(_, params)| params.clone())
18746 .collect::<Vec<_>>(),
18747 expected_params
18748 );
18749 }
18750
18751 #[cfg(feature = "sql-sqlite")]
18752 #[test]
18753 fn sql_read_query_chunks_with_options_and_index_col_applies_options_before_indexing() {
18754 let conn = make_sql_test_conn();
18755 conn.execute_batch(
18756 "CREATE TABLE query_index_events (ts TEXT, amount TEXT, keep INTEGER);
18757 INSERT INTO query_index_events (ts, amount, keep) VALUES
18758 ('2024-01-15', '12.50', 0),
18759 ('2024-02-01 05:06:07', '$1,234.50', 1),
18760 ('2024-03-03', '-3.25', 1);",
18761 )
18762 .expect("create query_index_events table");
18763
18764 let chunks = read_sql_query_chunks_with_options_and_index_col(
18765 &conn,
18766 "SELECT ts, amount FROM query_index_events WHERE keep = ? ORDER BY ts",
18767 &SqlReadOptions {
18768 params: Some(vec![Scalar::Int64(1)]),
18769 parse_dates: Some(vec!["ts".to_owned()]),
18770 coerce_float: true,
18771 dtype: None,
18772 schema: None,
18773 columns: None,
18774 index_col: None,
18775 },
18776 Some("ts"),
18777 1,
18778 )
18779 .expect("indexed query chunk iterator")
18780 .collect::<Result<Vec<_>, _>>()
18781 .expect("all chunks");
18782
18783 assert_eq!(chunks.len(), 2);
18784 assert_eq!(chunks[0].index().name(), Some("ts"));
18785 assert_eq!(
18786 chunks[0].index().labels(),
18787 &[IndexLabel::Utf8("2024-02-01 05:06:07".to_owned())]
18788 );
18789 assert!(chunks[0].column("ts").is_none());
18790 assert_eq!(
18791 chunks[0].column("amount").unwrap().values(),
18792 &[Scalar::Float64(1234.5)]
18793 );
18794 assert_eq!(
18795 chunks[1].index().labels(),
18796 &[IndexLabel::Utf8("2024-03-03 00:00:00".to_owned())]
18797 );
18798 assert_eq!(
18799 chunks[1].column("amount").unwrap().values(),
18800 &[Scalar::Float64(-3.25)]
18801 );
18802 }
18803
18804 #[cfg(feature = "sql-sqlite")]
18805 #[test]
18806 fn sql_read_chunks_with_options_and_index_col_none_keeps_options_and_range_index() {
18807 let conn = make_sql_test_conn();
18808 conn.execute_batch(
18809 "CREATE TABLE query_options_no_index (id INTEGER, amount TEXT);
18810 INSERT INTO query_options_no_index (id, amount) VALUES
18811 (1, '$10.50'),
18812 (2, '11.25');",
18813 )
18814 .expect("create query_options_no_index table");
18815
18816 let chunks = read_sql_chunks_with_options_and_index_col(
18817 &conn,
18818 "SELECT id, amount FROM query_options_no_index ORDER BY id",
18819 &SqlReadOptions {
18820 params: None,
18821 parse_dates: None,
18822 coerce_float: true,
18823 dtype: None,
18824 schema: None,
18825 columns: None,
18826 index_col: None,
18827 },
18828 None,
18829 1,
18830 )
18831 .expect("query chunk iterator")
18832 .collect::<Result<Vec<_>, _>>()
18833 .expect("all chunks");
18834
18835 assert_eq!(chunks.len(), 2);
18836 assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(0)]);
18837 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
18838 assert_eq!(
18839 chunks[0].column("amount").unwrap().values(),
18840 &[Scalar::Float64(10.5)]
18841 );
18842 assert_eq!(
18843 chunks[1].column("amount").unwrap().values(),
18844 &[Scalar::Float64(11.25)]
18845 );
18846 }
18847
18848 #[cfg(feature = "sql-sqlite")]
18849 #[test]
18850 fn sql_read_chunks_with_options_and_index_col_uses_options_index_when_explicit_none() {
18851 let conn = make_sql_test_conn();
18852 conn.execute_batch(
18853 "CREATE TABLE query_options_struct_index (id INTEGER, amount TEXT);
18854 INSERT INTO query_options_struct_index (id, amount) VALUES
18855 (10, '$10.50'),
18856 (20, '11.25');",
18857 )
18858 .expect("create query_options_struct_index table");
18859
18860 let chunks = read_sql_chunks_with_options_and_index_col(
18861 &conn,
18862 "SELECT id, amount FROM query_options_struct_index ORDER BY id",
18863 &SqlReadOptions {
18864 params: None,
18865 parse_dates: None,
18866 coerce_float: true,
18867 dtype: None,
18868 schema: None,
18869 columns: None,
18870 index_col: Some("id".to_owned()),
18871 },
18872 None,
18873 1,
18874 )
18875 .expect("query chunk iterator")
18876 .collect::<Result<Vec<_>, _>>()
18877 .expect("all chunks");
18878
18879 assert_eq!(chunks.len(), 2);
18880 assert_eq!(chunks[0].index().name(), Some("id"));
18881 assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(10)]);
18882 assert!(chunks[0].column("id").is_none());
18883 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(20)]);
18884 assert_eq!(
18885 chunks[1].column("amount").unwrap().values(),
18886 &[Scalar::Float64(11.25)]
18887 );
18888 }
18889
18890 #[cfg(feature = "sql-sqlite")]
18891 #[test]
18892 fn sql_read_query_chunks_with_options_and_index_col_missing_column_errors() {
18893 let conn = make_sql_test_conn();
18894 conn.execute_batch(
18895 "CREATE TABLE query_options_missing_index (id INTEGER, amount TEXT);
18896 INSERT INTO query_options_missing_index (id, amount) VALUES (1, '10.5');",
18897 )
18898 .expect("create query_options_missing_index table");
18899
18900 let err = read_sql_query_chunks_with_options_and_index_col(
18901 &conn,
18902 "SELECT id, amount FROM query_options_missing_index",
18903 &SqlReadOptions {
18904 params: None,
18905 parse_dates: None,
18906 coerce_float: true,
18907 dtype: None,
18908 schema: None,
18909 columns: None,
18910 index_col: None,
18911 },
18912 Some("missing"),
18913 1,
18914 )
18915 .expect_err("missing index_col should error during iterator construction");
18916
18917 assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
18918 }
18919
18920 #[cfg(feature = "sql-sqlite")]
18921 #[test]
18922 fn sql_read_query_with_options_and_index_col_applies_options_before_indexing() {
18923 let conn = make_sql_test_conn();
18924 conn.execute_batch(
18925 "CREATE TABLE query_frame_index_events (ts TEXT, amount TEXT, keep INTEGER);
18926 INSERT INTO query_frame_index_events (ts, amount, keep) VALUES
18927 ('2024-01-15', '12.50', 0),
18928 ('2024-02-01 05:06:07', '$1,234.50', 1),
18929 ('2024-03-03', '-3.25', 1);",
18930 )
18931 .expect("create query_frame_index_events table");
18932
18933 let frame = read_sql_query_with_options_and_index_col(
18934 &conn,
18935 "SELECT ts, amount FROM query_frame_index_events WHERE keep = ? ORDER BY ts",
18936 &SqlReadOptions {
18937 params: Some(vec![Scalar::Int64(1)]),
18938 parse_dates: Some(vec!["ts".to_owned()]),
18939 coerce_float: true,
18940 dtype: None,
18941 schema: None,
18942 columns: None,
18943 index_col: None,
18944 },
18945 Some("ts"),
18946 )
18947 .expect("read indexed query frame");
18948
18949 assert_eq!(frame.index().name(), Some("ts"));
18950 assert_eq!(
18951 frame.index().labels(),
18952 &[
18953 IndexLabel::Utf8("2024-02-01 05:06:07".to_owned()),
18954 IndexLabel::Utf8("2024-03-03 00:00:00".to_owned())
18955 ]
18956 );
18957 assert!(frame.column("ts").is_none());
18958 assert_eq!(
18959 frame.column("amount").unwrap().values(),
18960 &[Scalar::Float64(1234.5), Scalar::Float64(-3.25)]
18961 );
18962 }
18963
18964 #[cfg(feature = "sql-sqlite")]
18965 #[test]
18966 fn sql_read_query_with_options_and_index_col_explicit_arg_wins() {
18967 let conn = make_sql_test_conn();
18968 conn.execute_batch(
18969 "CREATE TABLE query_frame_index_override (a INTEGER, b INTEGER, val TEXT);
18970 INSERT INTO query_frame_index_override (a, b, val) VALUES
18971 (1, 100, 'x'),
18972 (2, 200, 'y');",
18973 )
18974 .expect("create query_frame_index_override table");
18975
18976 let frame = read_sql_query_with_options_and_index_col(
18977 &conn,
18978 "SELECT a, b, val FROM query_frame_index_override ORDER BY a",
18979 &SqlReadOptions {
18980 index_col: Some("a".to_owned()),
18981 ..SqlReadOptions::default()
18982 },
18983 Some("b"),
18984 )
18985 .expect("read indexed query frame with override");
18986
18987 assert_eq!(frame.column_names(), vec!["a", "val"]);
18988 assert_eq!(
18989 frame.index().labels(),
18990 &[IndexLabel::Int64(100), IndexLabel::Int64(200)]
18991 );
18992 }
18993
18994 #[cfg(feature = "sql-sqlite")]
18995 #[test]
18996 fn sql_read_query_chunks_rejects_zero_chunksize() {
18997 let conn = make_sql_test_conn();
18998
18999 let err = read_sql_query_chunks(&conn, "SELECT 1", 0)
19000 .expect_err("zero query chunksize should be rejected");
19001
19002 assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
19003 }
19004
19005 #[cfg(feature = "sql-sqlite")]
19006 #[test]
19007 fn sql_read_query_chunks_with_index_col_promotes_each_chunk_index() {
19008 let conn = make_sql_test_conn();
19009 conn.execute_batch(
19010 "CREATE TABLE query_index_chunked (id INTEGER, label TEXT, value INTEGER);
19011 INSERT INTO query_index_chunked (id, label, value) VALUES
19012 (1, 'alpha', 10),
19013 (2, 'beta', 20),
19014 (3, 'gamma', 30);",
19015 )
19016 .expect("create query_index_chunked table");
19017
19018 let chunks = read_sql_query_chunks_with_index_col(
19019 &conn,
19020 "SELECT id, label, value FROM query_index_chunked ORDER BY id",
19021 Some("label"),
19022 2,
19023 )
19024 .expect("query indexed chunk iterator")
19025 .collect::<Result<Vec<_>, _>>()
19026 .expect("all chunks");
19027
19028 assert_eq!(chunks.len(), 2);
19029 assert_eq!(chunks[0].index().name(), Some("label"));
19030 assert_eq!(
19031 chunks[0].index().labels(),
19032 &[
19033 IndexLabel::Utf8("alpha".to_owned()),
19034 IndexLabel::Utf8("beta".to_owned())
19035 ]
19036 );
19037 assert!(chunks[0].column("label").is_none());
19038 assert_eq!(
19039 chunks[1].index().labels(),
19040 &[IndexLabel::Utf8("gamma".to_owned())]
19041 );
19042 assert_eq!(
19043 chunks[1].column("value").unwrap().values(),
19044 &[Scalar::Int64(30)]
19045 );
19046 }
19047
19048 #[cfg(feature = "sql-sqlite")]
19049 #[test]
19050 fn sql_read_chunks_with_index_col_none_keeps_fresh_chunk_range_indexes() {
19051 let conn = make_sql_test_conn();
19052 conn.execute_batch(
19053 "CREATE TABLE query_no_index_chunked (id INTEGER, label TEXT);
19054 INSERT INTO query_no_index_chunked (id, label) VALUES
19055 (1, 'alpha'),
19056 (2, 'beta');",
19057 )
19058 .expect("create query_no_index_chunked table");
19059
19060 let chunks = read_sql_chunks_with_index_col(
19061 &conn,
19062 "SELECT id, label FROM query_no_index_chunked ORDER BY id",
19063 None,
19064 1,
19065 )
19066 .expect("query chunk iterator")
19067 .collect::<Result<Vec<_>, _>>()
19068 .expect("all chunks");
19069
19070 assert_eq!(chunks.len(), 2);
19071 assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(0)]);
19072 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
19073 assert_eq!(chunks[1].column_names(), vec!["id", "label"]);
19074 }
19075
19076 #[cfg(feature = "sql-sqlite")]
19077 #[test]
19078 fn sql_read_query_chunks_with_index_col_missing_column_errors() {
19079 let conn = make_sql_test_conn();
19080 conn.execute_batch(
19081 "CREATE TABLE query_missing_index_chunked (id INTEGER, value INTEGER);
19082 INSERT INTO query_missing_index_chunked (id, value) VALUES (1, 10);",
19083 )
19084 .expect("create query_missing_index_chunked table");
19085
19086 let err = read_sql_query_chunks_with_index_col(
19087 &conn,
19088 "SELECT id, value FROM query_missing_index_chunked",
19089 Some("missing"),
19090 1,
19091 )
19092 .expect_err("missing index_col should error during iterator construction");
19093
19094 assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
19095 }
19096
19097 #[cfg(feature = "sql-sqlite")]
19098 #[test]
19099 fn sql_read_table_chunks_batches_rows() {
19100 let conn = make_sql_test_conn();
19101 conn.execute_batch(
19102 "CREATE TABLE table_chunked (id INTEGER, name TEXT);
19103 INSERT INTO table_chunked (id, name) VALUES
19104 (1, 'alpha'),
19105 (2, 'beta'),
19106 (3, 'gamma'),
19107 (4, 'delta');",
19108 )
19109 .expect("create table_chunked table");
19110
19111 let chunks = read_sql_table_chunks(&conn, "table_chunked", 3)
19112 .expect("table chunk iterator")
19113 .collect::<Result<Vec<_>, _>>()
19114 .expect("all chunks");
19115
19116 assert_eq!(chunks.len(), 2);
19117 assert_eq!(chunks[0].column_names(), vec!["id", "name"]);
19118 assert_eq!(
19119 chunks[0].column("id").unwrap().values(),
19120 &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
19121 );
19122 assert_eq!(
19123 chunks[1].column("name").unwrap().values(),
19124 &[Scalar::Utf8("delta".to_owned())]
19125 );
19126 }
19127
19128 #[cfg(feature = "sql-sqlite")]
19129 #[test]
19130 fn sql_read_table_chunks_rejects_zero_chunksize() {
19131 let conn = make_sql_test_conn();
19132 conn.execute_batch("CREATE TABLE zero_chunked (id INTEGER);")
19133 .expect("create zero_chunked table");
19134
19135 let err = read_sql_table_chunks(&conn, "zero_chunked", 0)
19136 .expect_err("zero table chunksize should be rejected");
19137
19138 assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
19139 }
19140
19141 #[cfg(feature = "sql-sqlite")]
19142 #[test]
19143 fn sql_read_table_chunks_rejects_invalid_table_name() {
19144 let conn = make_sql_test_conn();
19145
19146 let err = read_sql_table_chunks(&conn, "bad table", 1)
19147 .expect_err("invalid table name should be rejected");
19148
19149 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid table name")));
19150 }
19151
19152 #[cfg(feature = "sql-sqlite")]
19153 #[test]
19154 fn sql_read_table_chunks_with_index_col_promotes_each_chunk_index() {
19155 let conn = make_sql_test_conn();
19156 conn.execute_batch(
19157 "CREATE TABLE table_index_chunked (id INTEGER, name TEXT, score INTEGER);
19158 INSERT INTO table_index_chunked (id, name, score) VALUES
19159 (10, 'alpha', 100),
19160 (20, 'beta', 200),
19161 (30, 'gamma', 300);",
19162 )
19163 .expect("create table_index_chunked table");
19164
19165 let chunks =
19166 read_sql_table_chunks_with_index_col(&conn, "table_index_chunked", Some("id"), 2)
19167 .expect("table indexed chunk iterator")
19168 .collect::<Result<Vec<_>, _>>()
19169 .expect("all chunks");
19170
19171 assert_eq!(chunks.len(), 2);
19172 assert_eq!(chunks[0].index().name(), Some("id"));
19173 assert_eq!(
19174 chunks[0].index().labels(),
19175 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
19176 );
19177 assert!(chunks[0].column("id").is_none());
19178 assert_eq!(
19179 chunks[1].column("score").unwrap().values(),
19180 &[Scalar::Int64(300)]
19181 );
19182 }
19183
19184 #[cfg(feature = "sql-sqlite")]
19185 #[test]
19186 fn sql_read_table_with_options_applies_parse_dates_and_coerce_float() {
19187 let conn = make_sql_test_conn();
19188 conn.execute_batch(
19189 "CREATE TABLE table_options (ts TEXT, amount TEXT, label TEXT);
19190 INSERT INTO table_options (ts, amount, label) VALUES
19191 ('2024-01-15', '$12.50', 'a'),
19192 ('2024-02-01 05:06:07', '1,234.50', 'b');",
19193 )
19194 .expect("create table_options table");
19195
19196 let frame = read_sql_table_with_options(
19197 &conn,
19198 "table_options",
19199 &SqlReadOptions {
19200 params: None,
19201 parse_dates: Some(vec!["ts".to_owned()]),
19202 coerce_float: true,
19203 dtype: None,
19204 schema: None,
19205 columns: None,
19206 index_col: None,
19207 },
19208 )
19209 .expect("read table with options");
19210
19211 assert_eq!(
19212 frame.column("ts").unwrap().values(),
19213 &[
19214 Scalar::Utf8("2024-01-15 00:00:00".to_owned()),
19215 Scalar::Utf8("2024-02-01 05:06:07".to_owned())
19216 ]
19217 );
19218 assert_eq!(
19219 frame.column("amount").unwrap().values(),
19220 &[Scalar::Float64(12.5), Scalar::Float64(1234.5)]
19221 );
19222 assert_eq!(
19223 frame.column("label").unwrap().values(),
19224 &[Scalar::Utf8("a".to_owned()), Scalar::Utf8("b".to_owned())]
19225 );
19226 }
19227
19228 #[cfg(feature = "sql-sqlite")]
19229 #[test]
19230 fn sql_read_table_chunks_with_options_applies_options_before_chunking() {
19231 let conn = make_sql_test_conn();
19232 conn.execute_batch(
19233 "CREATE TABLE table_options_chunked (ts TEXT, amount TEXT);
19234 INSERT INTO table_options_chunked (ts, amount) VALUES
19235 ('2024-03-01', '$10.00'),
19236 ('2024-03-02', '$20.50'),
19237 ('2024-03-03', '-3.25');",
19238 )
19239 .expect("create table_options_chunked table");
19240
19241 let chunks = read_sql_table_chunks_with_options(
19242 &conn,
19243 "table_options_chunked",
19244 &SqlReadOptions {
19245 params: None,
19246 parse_dates: Some(vec!["ts".to_owned()]),
19247 coerce_float: true,
19248 dtype: None,
19249 schema: None,
19250 columns: None,
19251 index_col: None,
19252 },
19253 2,
19254 )
19255 .expect("table option chunk iterator")
19256 .collect::<Result<Vec<_>, _>>()
19257 .expect("all chunks");
19258
19259 assert_eq!(chunks.len(), 2);
19260 assert_eq!(
19261 chunks[0].column("ts").unwrap().values(),
19262 &[
19263 Scalar::Utf8("2024-03-01 00:00:00".to_owned()),
19264 Scalar::Utf8("2024-03-02 00:00:00".to_owned())
19265 ]
19266 );
19267 assert_eq!(
19268 chunks[0].column("amount").unwrap().values(),
19269 &[Scalar::Float64(10.0), Scalar::Float64(20.5)]
19270 );
19271 assert_eq!(
19272 chunks[1].column("amount").unwrap().values(),
19273 &[Scalar::Float64(-3.25)]
19274 );
19275 }
19276
19277 #[cfg(feature = "sql-sqlite")]
19278 #[test]
19279 fn sql_read_table_chunks_with_options_validates_chunksize_and_table_name() {
19280 let conn = make_sql_test_conn();
19281 conn.execute_batch(
19282 "CREATE TABLE table_options_errors (ts TEXT);
19283 INSERT INTO table_options_errors (ts) VALUES ('2024-01-01');",
19284 )
19285 .expect("create table_options_errors table");
19286
19287 let zero = read_sql_table_chunks_with_options(
19288 &conn,
19289 "table_options_errors",
19290 &SqlReadOptions::default(),
19291 0,
19292 )
19293 .expect_err("zero chunksize should be rejected");
19294 assert!(matches!(zero, IoError::Sql(msg) if msg.contains("chunksize")));
19295
19296 let invalid = read_sql_table_with_options(
19297 &conn,
19298 "bad table",
19299 &SqlReadOptions {
19300 parse_dates: Some(vec!["ts".to_owned()]),
19301 ..SqlReadOptions::default()
19302 },
19303 )
19304 .expect_err("invalid table name should be rejected");
19305 assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid table name")));
19306 }
19307
19308 #[cfg(feature = "sql-sqlite")]
19316 #[test]
19317 fn read_sql_table_chunks_with_options_rejects_options_index_col() {
19318 let conn = make_sql_test_conn();
19319 super::SqlConnection::execute_batch(
19320 &conn,
19321 "CREATE TABLE i8kja_table_chunks_reject (id INTEGER, val TEXT);",
19322 )
19323 .unwrap();
19324 super::SqlConnection::execute_batch(
19325 &conn,
19326 "INSERT INTO i8kja_table_chunks_reject VALUES (1, 'a'), (2, 'b');",
19327 )
19328 .unwrap();
19329
19330 let err = read_sql_table_chunks_with_options(
19331 &conn,
19332 "i8kja_table_chunks_reject",
19333 &SqlReadOptions {
19334 index_col: Some("id".to_owned()),
19335 ..Default::default()
19336 },
19337 2,
19338 )
19339 .expect_err("options.index_col on non-indexed entrypoint must be rejected");
19340 assert!(
19341 matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_table_chunks_with_options_and_index_col")),
19342 "expected typed error pointing to the _and_index_col variant, got: {err:?}"
19343 );
19344
19345 let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
19347 &conn,
19348 "i8kja_table_chunks_reject",
19349 &SqlReadOptions {
19350 index_col: Some("id".to_owned()),
19351 ..Default::default()
19352 },
19353 None,
19354 2,
19355 )
19356 .expect("indexed sibling honors options.index_col")
19357 .collect::<Result<Vec<_>, _>>()
19358 .expect("all chunks");
19359 assert_eq!(chunks.len(), 1);
19360 assert!(chunks[0].column("id").is_none());
19361 assert_eq!(
19362 chunks[0].index().labels(),
19363 &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
19364 );
19365 assert_eq!(
19366 chunks[0].column("val").unwrap().values(),
19367 &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
19368 );
19369 }
19370
19371 #[cfg(feature = "sql-sqlite")]
19372 #[test]
19373 fn read_sql_chunks_with_options_rejects_options_index_col() {
19374 let conn = make_sql_test_conn();
19375 super::SqlConnection::execute_batch(
19376 &conn,
19377 "CREATE TABLE i8kja_query_chunks_reject (id INTEGER, val TEXT);",
19378 )
19379 .unwrap();
19380 super::SqlConnection::execute_batch(
19381 &conn,
19382 "INSERT INTO i8kja_query_chunks_reject VALUES (1, 'a');",
19383 )
19384 .unwrap();
19385
19386 let err = read_sql_chunks_with_options(
19387 &conn,
19388 "SELECT * FROM i8kja_query_chunks_reject",
19389 &SqlReadOptions {
19390 index_col: Some("id".to_owned()),
19391 ..Default::default()
19392 },
19393 2,
19394 )
19395 .expect_err("options.index_col on non-indexed entrypoint must be rejected");
19396 assert!(
19397 matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_chunks_with_options_and_index_col")),
19398 "expected typed error pointing to the _and_index_col variant, got: {err:?}"
19399 );
19400
19401 let err = read_sql_query_chunks_with_options(
19402 &conn,
19403 "SELECT * FROM i8kja_query_chunks_reject",
19404 &SqlReadOptions {
19405 index_col: Some("id".to_owned()),
19406 ..Default::default()
19407 },
19408 2,
19409 )
19410 .expect_err("query delegator should propagate the rejection");
19411 assert!(
19412 matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_query_chunks_with_options_and_index_col")),
19413 "expected query-specific _and_index_col suggestion, got: {err:?}"
19414 );
19415 }
19416
19417 #[cfg(feature = "sql-sqlite")]
19423 #[test]
19424 fn read_sql_with_options_rejects_options_columns_across_query_entrypoints() {
19425 let conn = make_sql_test_conn();
19426 super::SqlConnection::execute_batch(
19427 &conn,
19428 "CREATE TABLE t1777_query_cols_reject (id INTEGER, val TEXT);",
19429 )
19430 .unwrap();
19431 super::SqlConnection::execute_batch(
19432 &conn,
19433 "INSERT INTO t1777_query_cols_reject VALUES (1, 'a'), (2, 'b');",
19434 )
19435 .unwrap();
19436
19437 fn assert_columns_rejection(err: &IoError, expected_sibling: &str) {
19438 assert!(
19439 matches!(err, IoError::Sql(msg)
19440 if msg.contains("options.columns") && msg.contains(expected_sibling)),
19441 "expected options.columns error pointing to `{expected_sibling}`, got: {err:?}"
19442 );
19443 }
19444
19445 let opts_with_cols = || SqlReadOptions {
19446 columns: Some(vec!["id".to_owned()]),
19447 ..Default::default()
19448 };
19449
19450 let err = read_sql_with_options(
19452 &conn,
19453 "SELECT id, val FROM t1777_query_cols_reject",
19454 &opts_with_cols(),
19455 )
19456 .expect_err("read_sql_with_options must reject options.columns");
19457 assert_columns_rejection(&err, "read_sql_table_with_options");
19458
19459 let err = read_sql_chunks_with_options(
19461 &conn,
19462 "SELECT id, val FROM t1777_query_cols_reject",
19463 &opts_with_cols(),
19464 2,
19465 )
19466 .expect_err("read_sql_chunks_with_options must reject options.columns");
19467 assert_columns_rejection(&err, "read_sql_table_chunks_with_options");
19468
19469 let err = read_sql_chunks_with_options_and_index_col(
19471 &conn,
19472 "SELECT id, val FROM t1777_query_cols_reject",
19473 &opts_with_cols(),
19474 Some("id"),
19475 2,
19476 )
19477 .expect_err("indexed chunks must reject options.columns");
19478 assert_columns_rejection(&err, "read_sql_table_chunks_with_options_and_index_col");
19479
19480 let err = read_sql_query_with_options(
19482 &conn,
19483 "SELECT id, val FROM t1777_query_cols_reject",
19484 &opts_with_cols(),
19485 )
19486 .expect_err("read_sql_query_with_options must propagate the rejection");
19487 assert_columns_rejection(&err, "read_sql_table_with_options");
19488
19489 let err = read_sql_query_with_options_and_index_col(
19491 &conn,
19492 "SELECT id, val FROM t1777_query_cols_reject",
19493 &opts_with_cols(),
19494 Some("id"),
19495 )
19496 .expect_err("indexed query reader must reject options.columns");
19497 assert_columns_rejection(&err, "read_sql_table_with_options");
19498
19499 let err = read_sql_query_chunks_with_options(
19501 &conn,
19502 "SELECT id, val FROM t1777_query_cols_reject",
19503 &opts_with_cols(),
19504 2,
19505 )
19506 .expect_err("query chunks delegator must reject options.columns");
19507 assert_columns_rejection(&err, "read_sql_table_chunks_with_options");
19508
19509 let err = read_sql_query_chunks_with_options_and_index_col(
19511 &conn,
19512 "SELECT id, val FROM t1777_query_cols_reject",
19513 &opts_with_cols(),
19514 Some("id"),
19515 2,
19516 )
19517 .expect_err("indexed query chunks delegator must reject options.columns");
19518 assert_columns_rejection(&err, "read_sql_table_chunks_with_options_and_index_col");
19519 }
19520
19521 #[cfg(feature = "sql-sqlite")]
19525 #[test]
19526 fn read_sql_table_with_options_still_honors_options_columns_after_t1777() {
19527 let conn = make_sql_test_conn();
19528 super::SqlConnection::execute_batch(
19529 &conn,
19530 "CREATE TABLE t1777_table_cols_honor (id INTEGER, val TEXT, secret TEXT);",
19531 )
19532 .unwrap();
19533 super::SqlConnection::execute_batch(
19534 &conn,
19535 "INSERT INTO t1777_table_cols_honor VALUES (1, 'a', 'x'), (2, 'b', 'y');",
19536 )
19537 .unwrap();
19538
19539 let frame = read_sql_table_with_options(
19542 &conn,
19543 "t1777_table_cols_honor",
19544 &SqlReadOptions {
19545 columns: Some(vec!["id".to_owned(), "val".to_owned()]),
19546 ..Default::default()
19547 },
19548 )
19549 .expect("table reader honors options.columns");
19550 assert_eq!(frame.column_names(), vec!["id", "val"]);
19551 assert!(frame.column("secret").is_none());
19552
19553 let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options(
19555 &conn,
19556 "t1777_table_cols_honor",
19557 &SqlReadOptions {
19558 columns: Some(vec!["id".to_owned(), "val".to_owned()]),
19559 ..Default::default()
19560 },
19561 1,
19562 )
19563 .expect("chunked table reader honors options.columns")
19564 .collect::<Result<Vec<_>, _>>()
19565 .expect("all chunks");
19566 assert_eq!(chunks.len(), 2);
19567 for c in &chunks {
19568 assert_eq!(c.column_names(), vec!["id", "val"]);
19569 assert!(c.column("secret").is_none());
19570 }
19571
19572 let frame = read_sql_table_with_options_and_index_col(
19574 &conn,
19575 "t1777_table_cols_honor",
19576 &SqlReadOptions {
19577 columns: Some(vec!["val".to_owned()]),
19578 ..Default::default()
19579 },
19580 Some("id"),
19581 )
19582 .expect("indexed table reader honors options.columns");
19583 assert_eq!(frame.index().name(), Some("id"));
19584 assert_eq!(frame.column_names(), vec!["val"]);
19585 assert!(frame.column("id").is_none());
19586 assert!(frame.column("secret").is_none());
19587
19588 let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
19590 &conn,
19591 "t1777_table_cols_honor",
19592 &SqlReadOptions {
19593 columns: Some(vec!["val".to_owned()]),
19594 ..Default::default()
19595 },
19596 Some("id"),
19597 1,
19598 )
19599 .expect("indexed chunked table reader honors options.columns")
19600 .collect::<Result<Vec<_>, _>>()
19601 .expect("all chunks");
19602 assert_eq!(chunks.len(), 2);
19603 for c in &chunks {
19604 assert_eq!(c.column_names(), vec!["val"]);
19605 assert!(c.column("id").is_none());
19606 assert!(c.column("secret").is_none());
19607 }
19608 }
19609
19610 #[cfg(feature = "sql-sqlite")]
19611 #[test]
19612 fn sql_read_table_with_options_and_index_col_applies_options_before_indexing() {
19613 let conn = make_sql_test_conn();
19614 conn.execute_batch(
19615 "CREATE TABLE table_options_index (ts TEXT, amount TEXT, label TEXT);
19616 INSERT INTO table_options_index (ts, amount, label) VALUES
19617 ('2024-04-01', '$10.00', 'a'),
19618 ('2024-04-02 03:04:05', '20.50', 'b');",
19619 )
19620 .expect("create table_options_index table");
19621
19622 let frame = read_sql_table_with_options_and_index_col(
19623 &conn,
19624 "table_options_index",
19625 &SqlReadOptions {
19626 params: None,
19627 parse_dates: Some(vec!["ts".to_owned()]),
19628 coerce_float: true,
19629 dtype: None,
19630 schema: None,
19631 columns: None,
19632 index_col: None,
19633 },
19634 Some("ts"),
19635 )
19636 .expect("read table with options and index_col");
19637
19638 assert_eq!(frame.index().name(), Some("ts"));
19639 assert_eq!(
19640 frame.index().labels(),
19641 &[
19642 IndexLabel::Utf8("2024-04-01 00:00:00".to_owned()),
19643 IndexLabel::Utf8("2024-04-02 03:04:05".to_owned())
19644 ]
19645 );
19646 assert!(frame.column("ts").is_none());
19647 assert_eq!(
19648 frame.column("amount").unwrap().values(),
19649 &[Scalar::Float64(10.0), Scalar::Float64(20.5)]
19650 );
19651 }
19652
19653 #[cfg(feature = "sql-sqlite")]
19654 #[test]
19655 fn sql_read_table_with_options_and_index_col_none_keeps_options_and_range_index() {
19656 let conn = make_sql_test_conn();
19657 conn.execute_batch(
19658 "CREATE TABLE table_options_no_index (id INTEGER, amount TEXT);
19659 INSERT INTO table_options_no_index (id, amount) VALUES
19660 (1, '$1.25'),
19661 (2, '$2.50');",
19662 )
19663 .expect("create table_options_no_index table");
19664
19665 let frame = read_sql_table_with_options_and_index_col(
19666 &conn,
19667 "table_options_no_index",
19668 &SqlReadOptions {
19669 params: None,
19670 parse_dates: None,
19671 coerce_float: true,
19672 dtype: None,
19673 schema: None,
19674 columns: None,
19675 index_col: None,
19676 },
19677 None,
19678 )
19679 .expect("read table with options and no index_col");
19680
19681 assert_eq!(
19682 frame.index().labels(),
19683 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
19684 );
19685 assert_eq!(frame.column_names(), vec!["id", "amount"]);
19686 assert_eq!(
19687 frame.column("amount").unwrap().values(),
19688 &[Scalar::Float64(1.25), Scalar::Float64(2.5)]
19689 );
19690 }
19691
19692 #[cfg(feature = "sql-sqlite")]
19693 #[test]
19694 fn sql_read_table_chunks_with_options_and_index_col_promotes_each_chunk_index() {
19695 let conn = make_sql_test_conn();
19696 conn.execute_batch(
19697 "CREATE TABLE table_options_index_chunked (ts TEXT, amount TEXT);
19698 INSERT INTO table_options_index_chunked (ts, amount) VALUES
19699 ('2024-05-01', '$10.00'),
19700 ('2024-05-02', '$20.00'),
19701 ('2024-05-03', '$30.50');",
19702 )
19703 .expect("create table_options_index_chunked table");
19704
19705 let chunks = read_sql_table_chunks_with_options_and_index_col(
19706 &conn,
19707 "table_options_index_chunked",
19708 &SqlReadOptions {
19709 params: None,
19710 parse_dates: Some(vec!["ts".to_owned()]),
19711 coerce_float: true,
19712 dtype: None,
19713 schema: None,
19714 columns: None,
19715 index_col: None,
19716 },
19717 Some("ts"),
19718 2,
19719 )
19720 .expect("table indexed option chunk iterator")
19721 .collect::<Result<Vec<_>, _>>()
19722 .expect("all chunks");
19723
19724 assert_eq!(chunks.len(), 2);
19725 assert_eq!(chunks[0].index().name(), Some("ts"));
19726 assert_eq!(
19727 chunks[0].index().labels(),
19728 &[
19729 IndexLabel::Utf8("2024-05-01 00:00:00".to_owned()),
19730 IndexLabel::Utf8("2024-05-02 00:00:00".to_owned())
19731 ]
19732 );
19733 assert!(chunks[0].column("ts").is_none());
19734 assert_eq!(
19735 chunks[0].column("amount").unwrap().values(),
19736 &[Scalar::Float64(10.0), Scalar::Float64(20.0)]
19737 );
19738 assert_eq!(
19739 chunks[1].index().labels(),
19740 &[IndexLabel::Utf8("2024-05-03 00:00:00".to_owned())]
19741 );
19742 assert_eq!(
19743 chunks[1].column("amount").unwrap().values(),
19744 &[Scalar::Float64(30.5)]
19745 );
19746 }
19747
19748 #[cfg(feature = "sql-sqlite")]
19749 #[test]
19750 fn sql_read_table_chunks_with_options_and_index_col_uses_options_index_when_explicit_none() {
19751 let conn = make_sql_test_conn();
19752 conn.execute_batch(
19753 "CREATE TABLE table_options_struct_index (id INTEGER, amount TEXT);
19754 INSERT INTO table_options_struct_index (id, amount) VALUES
19755 (10, '$10.00'),
19756 (20, '$20.00'),
19757 (30, '$30.50');",
19758 )
19759 .expect("create table_options_struct_index table");
19760
19761 let chunks = read_sql_table_chunks_with_options_and_index_col(
19762 &conn,
19763 "table_options_struct_index",
19764 &SqlReadOptions {
19765 params: None,
19766 parse_dates: None,
19767 coerce_float: true,
19768 dtype: None,
19769 schema: None,
19770 columns: None,
19771 index_col: Some("id".to_owned()),
19772 },
19773 None,
19774 2,
19775 )
19776 .expect("table indexed option chunk iterator")
19777 .collect::<Result<Vec<_>, _>>()
19778 .expect("all chunks");
19779
19780 assert_eq!(chunks.len(), 2);
19781 assert_eq!(
19782 chunks[0].index().labels(),
19783 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
19784 );
19785 assert!(chunks[0].column("id").is_none());
19786 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
19787 assert_eq!(
19788 chunks[1].column("amount").unwrap().values(),
19789 &[Scalar::Float64(30.5)]
19790 );
19791 }
19792
19793 #[cfg(feature = "sql-sqlite")]
19794 #[test]
19795 fn sql_read_table_chunks_with_options_and_index_col_missing_column_errors() {
19796 let conn = make_sql_test_conn();
19797 conn.execute_batch(
19798 "CREATE TABLE table_options_missing_index (id INTEGER, amount TEXT);
19799 INSERT INTO table_options_missing_index (id, amount) VALUES (1, '$10.00');",
19800 )
19801 .expect("create table_options_missing_index table");
19802
19803 let err = read_sql_table_chunks_with_options_and_index_col(
19804 &conn,
19805 "table_options_missing_index",
19806 &SqlReadOptions {
19807 params: None,
19808 parse_dates: None,
19809 coerce_float: true,
19810 dtype: None,
19811 schema: None,
19812 columns: None,
19813 index_col: None,
19814 },
19815 Some("missing"),
19816 1,
19817 )
19818 .expect_err("missing index_col should error during iterator construction");
19819
19820 assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
19821 }
19822
19823 #[cfg(feature = "sql-sqlite")]
19824 #[test]
19825 fn sql_read_with_parse_dates_coerces_named_columns() {
19826 let conn = make_sql_test_conn();
19827 conn.execute_batch(
19828 "CREATE TABLE events (ts TEXT, value INTEGER);
19829 INSERT INTO events (ts, value) VALUES
19830 ('2024-01-15', 1),
19831 ('2024-02-01 05:06:07', 2);",
19832 )
19833 .expect("create events table");
19834
19835 let frame = read_sql_with_options(
19836 &conn,
19837 "SELECT ts, value FROM events ORDER BY value",
19838 &SqlReadOptions {
19839 params: None,
19840 parse_dates: Some(vec!["ts".to_owned()]),
19841 coerce_float: false,
19842 dtype: None,
19843 schema: None,
19844 columns: None,
19845 index_col: None,
19846 },
19847 )
19848 .expect("read sql with parse_dates");
19849
19850 assert_eq!(
19851 frame.column("ts").unwrap().values()[0],
19852 Scalar::Utf8("2024-01-15 00:00:00".into())
19853 );
19854 assert_eq!(
19855 frame.column("ts").unwrap().values()[1],
19856 Scalar::Utf8("2024-02-01 05:06:07".into())
19857 );
19858 assert_eq!(frame.column("value").unwrap().values()[0], Scalar::Int64(1));
19859 assert_eq!(frame.column("value").unwrap().values()[1], Scalar::Int64(2));
19860 }
19861
19862 #[cfg(feature = "sql-sqlite")]
19863 #[test]
19864 fn sql_read_with_parse_dates_missing_column_errors() {
19865 let conn = make_sql_test_conn();
19866 conn.execute_batch(
19867 "CREATE TABLE metrics (value INTEGER);
19868 INSERT INTO metrics (value) VALUES (1);",
19869 )
19870 .expect("create metrics table");
19871
19872 let err = read_sql_with_options(
19873 &conn,
19874 "SELECT value FROM metrics",
19875 &SqlReadOptions {
19876 params: None,
19877 parse_dates: Some(vec!["ts".to_owned()]),
19878 coerce_float: false,
19879 dtype: None,
19880 schema: None,
19881 columns: None,
19882 index_col: None,
19883 },
19884 )
19885 .expect_err("missing parse_dates column should error");
19886
19887 assert!(
19888 matches!(err, IoError::MissingParseDateColumns(missing) if missing == vec!["ts".to_owned()])
19889 );
19890 }
19891
19892 #[cfg(feature = "sql-sqlite")]
19893 #[test]
19894 fn sql_read_with_params_binds_positional_placeholders() {
19895 let frame = make_test_dataframe();
19896 let conn = make_sql_test_conn();
19897 write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
19898
19899 let filtered = read_sql_with_options(
19900 &conn,
19901 "SELECT ints, names FROM data WHERE ints > ? AND names != ? ORDER BY ints",
19902 &SqlReadOptions {
19903 params: Some(vec![Scalar::Int64(15), Scalar::Utf8("bob".to_owned())]),
19904 parse_dates: None,
19905 coerce_float: false,
19906 dtype: None,
19907 schema: None,
19908 columns: None,
19909 index_col: None,
19910 },
19911 )
19912 .expect("read sql with params");
19913
19914 assert_eq!(filtered.index().len(), 1);
19915 assert_eq!(
19916 filtered.column("ints").unwrap().values(),
19917 &[Scalar::Int64(30)]
19918 );
19919 assert_eq!(
19920 filtered.column("names").unwrap().values(),
19921 &[Scalar::Utf8("carol".into())]
19922 );
19923 }
19924
19925 #[cfg(feature = "sql-sqlite")]
19926 #[test]
19927 fn sql_read_with_params_wrong_arity_errors() {
19928 let frame = make_test_dataframe();
19929 let conn = make_sql_test_conn();
19930 write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
19931
19932 let err = read_sql_with_options(
19933 &conn,
19934 "SELECT ints FROM data WHERE ints > ? AND names != ?",
19935 &SqlReadOptions {
19936 params: Some(vec![Scalar::Int64(15)]),
19937 parse_dates: None,
19938 coerce_float: false,
19939 dtype: None,
19940 schema: None,
19941 columns: None,
19942 index_col: None,
19943 },
19944 )
19945 .expect_err("wrong arity should error");
19946
19947 assert!(matches!(err, IoError::Sql(msg) if msg.contains("parameter")));
19948 }
19949
19950 #[cfg(feature = "sql-sqlite")]
19951 #[test]
19952 fn sql_read_coerce_float_promotes_decimal_like_text_columns() {
19953 let conn = make_sql_test_conn();
19954 conn.execute_batch(
19955 "CREATE TABLE payments (id INTEGER, amount TEXT, fee TEXT);
19956 INSERT INTO payments (id, amount, fee) VALUES
19957 (1, '12.50', '$1,234.50'),
19958 (2, '-3.25', NULL);",
19959 )
19960 .expect("create payments table");
19961
19962 let default_frame =
19966 read_sql(&conn, "SELECT amount FROM payments ORDER BY id").expect("default read");
19967 assert_eq!(
19968 default_frame.column("amount").unwrap().dtype(),
19969 DType::Float64
19970 );
19971 assert_eq!(
19972 default_frame.column("amount").unwrap().values(),
19973 &[Scalar::Float64(12.5), Scalar::Float64(-3.25)],
19974 );
19975
19976 let no_coerce = read_sql_with_options(
19978 &conn,
19979 "SELECT amount FROM payments ORDER BY id",
19980 &SqlReadOptions {
19981 coerce_float: false,
19982 ..SqlReadOptions::default()
19983 },
19984 )
19985 .expect("read without coerce_float");
19986 assert_eq!(
19987 no_coerce.column("amount").unwrap().values(),
19988 &[
19989 Scalar::Utf8("12.50".to_owned()),
19990 Scalar::Utf8("-3.25".to_owned()),
19991 ],
19992 );
19993
19994 let coerced = read_sql_with_options(
19995 &conn,
19996 "SELECT amount, fee FROM payments ORDER BY id",
19997 &SqlReadOptions {
19998 coerce_float: true,
19999 ..SqlReadOptions::default()
20000 },
20001 )
20002 .expect("read with coerce_float");
20003
20004 let amount = coerced.column("amount").expect("amount");
20005 assert_eq!(amount.dtype(), DType::Float64);
20006 assert_eq!(
20007 amount.values(),
20008 &[Scalar::Float64(12.5), Scalar::Float64(-3.25)],
20009 );
20010
20011 let fee = coerced.column("fee").expect("fee");
20012 assert_eq!(fee.dtype(), DType::Float64);
20013 assert_eq!(fee.values()[0], Scalar::Float64(1234.5));
20014 assert!(matches!(fee.values()[1], Scalar::Null(NullKind::NaN)));
20015 }
20016
20017 #[cfg(feature = "sql-sqlite")]
20018 #[test]
20019 fn sql_read_coerce_float_leaves_non_numeric_text_columns_unchanged() {
20020 let conn = make_sql_test_conn();
20021 conn.execute_batch(
20022 "CREATE TABLE mixed (id INTEGER, maybe_amount TEXT, label TEXT);
20023 INSERT INTO mixed (id, maybe_amount, label) VALUES
20024 (1, '12.50', 'alpha'),
20025 (2, 'not numeric', '20.0');",
20026 )
20027 .expect("create mixed table");
20028
20029 let frame = read_sql_with_options(
20030 &conn,
20031 "SELECT maybe_amount, label FROM mixed ORDER BY id",
20032 &SqlReadOptions {
20033 coerce_float: true,
20034 dtype: None,
20035 schema: None,
20036 columns: None,
20037 index_col: None,
20038 ..SqlReadOptions::default()
20039 },
20040 )
20041 .expect("read with coerce_float");
20042
20043 assert_eq!(
20044 frame.column("maybe_amount").unwrap().values(),
20045 &[
20046 Scalar::Utf8("12.50".to_owned()),
20047 Scalar::Utf8("not numeric".to_owned()),
20048 ],
20049 );
20050 assert_eq!(
20051 frame.column("label").unwrap().values(),
20052 &[
20053 Scalar::Utf8("alpha".to_owned()),
20054 Scalar::Utf8("20.0".to_owned()),
20055 ],
20056 );
20057 }
20058
20059 #[cfg(feature = "sql-sqlite")]
20060 #[test]
20061 fn sql_read_chunks_batches_rows_and_resets_index_per_chunk() {
20062 let conn = make_sql_test_conn();
20063 conn.execute_batch(
20064 "CREATE TABLE chunked (id INTEGER, name TEXT);
20065 INSERT INTO chunked (id, name) VALUES
20066 (1, 'alpha'),
20067 (2, 'beta'),
20068 (3, 'gamma'),
20069 (4, 'delta'),
20070 (5, 'epsilon');",
20071 )
20072 .expect("create chunked table");
20073
20074 let chunks = read_sql_chunks(&conn, "SELECT id, name FROM chunked ORDER BY id", 2)
20075 .expect("chunk iterator")
20076 .collect::<Result<Vec<_>, _>>()
20077 .expect("all chunks");
20078
20079 assert_eq!(chunks.len(), 3);
20080 assert_eq!(
20081 chunks[0].index().labels(),
20082 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
20083 );
20084 assert_eq!(
20085 chunks[1].index().labels(),
20086 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
20087 );
20088 assert_eq!(chunks[2].index().labels(), &[IndexLabel::Int64(0)]);
20089 assert_eq!(
20090 chunks[0].column("id").unwrap().values(),
20091 &[Scalar::Int64(1), Scalar::Int64(2)]
20092 );
20093 assert_eq!(
20094 chunks[1].column("id").unwrap().values(),
20095 &[Scalar::Int64(3), Scalar::Int64(4)]
20096 );
20097 assert_eq!(
20098 chunks[2].column("name").unwrap().values(),
20099 &[Scalar::Utf8("epsilon".to_owned())]
20100 );
20101 }
20102
20103 #[cfg(feature = "sql-sqlite")]
20104 #[test]
20105 fn sql_read_chunks_sqlite_uses_paged_iterator_state() {
20106 let conn = make_sql_test_conn();
20107 conn.execute_batch(
20108 "CREATE TABLE fp_sqlite_paged_chunks (id INTEGER, name TEXT);
20109 INSERT INTO fp_sqlite_paged_chunks (id, name) VALUES
20110 (1, 'alpha'),
20111 (2, 'beta');",
20112 )
20113 .expect("create sqlite_paged_chunks table");
20114
20115 let mut chunks = read_sql_chunks(
20116 &conn,
20117 "SELECT id, name FROM fp_sqlite_paged_chunks ORDER BY id",
20118 1,
20119 )
20120 .expect("chunk iterator");
20121
20122 let initial_debug = format!("{chunks:?}");
20123 assert!(
20124 initial_debug.contains("mode: \"paged\""),
20125 "SQLite chunk reads must use paged mode, got {initial_debug}"
20126 );
20127 assert!(initial_debug.contains("next_offset: 0"));
20128
20129 let first = chunks
20130 .next()
20131 .expect("first chunk")
20132 .expect("first chunk should read");
20133 assert_eq!(first.column("id").unwrap().values(), &[Scalar::Int64(1)]);
20134
20135 let after_first_debug = format!("{chunks:?}");
20136 assert!(after_first_debug.contains("next_offset: 1"));
20137 }
20138
20139 #[cfg(feature = "sql-sqlite")]
20140 #[test]
20141 fn sql_read_chunks_with_options_applies_params_parse_dates_and_coerce_float() {
20142 let conn = make_sql_test_conn();
20143 conn.execute_batch(
20144 "CREATE TABLE events (ts TEXT, amount TEXT, keep INTEGER);
20145 INSERT INTO events (ts, amount, keep) VALUES
20146 ('2024-01-15', '12.50', 0),
20147 ('2024-02-01 05:06:07', '$1,234.50', 1),
20148 ('2024-03-03', '-3.25', 1);",
20149 )
20150 .expect("create events table");
20151
20152 let chunks = read_sql_chunks_with_options(
20153 &conn,
20154 "SELECT ts, amount FROM events WHERE keep = ? ORDER BY ts",
20155 &SqlReadOptions {
20156 params: Some(vec![Scalar::Int64(1)]),
20157 parse_dates: Some(vec!["ts".to_owned()]),
20158 coerce_float: true,
20159 dtype: None,
20160 schema: None,
20161 columns: None,
20162 index_col: None,
20163 },
20164 1,
20165 )
20166 .expect("chunk iterator")
20167 .collect::<Result<Vec<_>, _>>()
20168 .expect("all chunks");
20169
20170 assert_eq!(chunks.len(), 2);
20171 assert_eq!(
20172 chunks[0].column("ts").unwrap().values(),
20173 &[Scalar::Utf8("2024-02-01 05:06:07".to_owned())]
20174 );
20175 assert_eq!(
20176 chunks[0].column("amount").unwrap().values(),
20177 &[Scalar::Float64(1234.5)]
20178 );
20179 assert_eq!(
20180 chunks[1].column("ts").unwrap().values(),
20181 &[Scalar::Utf8("2024-03-03 00:00:00".to_owned())]
20182 );
20183 assert_eq!(
20184 chunks[1].column("amount").unwrap().values(),
20185 &[Scalar::Float64(-3.25)]
20186 );
20187 }
20188
20189 #[cfg(feature = "sql-sqlite")]
20190 #[test]
20191 fn sql_read_chunks_rejects_zero_chunksize() {
20192 let conn = make_sql_test_conn();
20193
20194 let err =
20195 read_sql_chunks(&conn, "SELECT 1", 0).expect_err("zero chunksize should be rejected");
20196
20197 assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
20198 }
20199
20200 #[cfg(feature = "sql-sqlite")]
20201 #[test]
20202 fn sql_duplicate_column_names_error() {
20203 let conn = make_sql_test_conn();
20204 let err = read_sql(&conn, "SELECT 1 as dup, 2 as dup");
20205 assert!(matches!(err, Err(IoError::DuplicateColumnName(name)) if name == "dup"));
20206 }
20207
20208 #[cfg(feature = "sql-sqlite")]
20209 #[test]
20210 fn sql_if_exists_fail() {
20211 let frame = make_test_dataframe();
20212 let conn = make_sql_test_conn();
20213 write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20214
20215 let err = write_sql(&frame, &conn, "tbl", SqlIfExists::Fail);
20216 assert!(err.is_err());
20217 assert!(matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("already exists")),);
20218 }
20219
20220 #[cfg(feature = "sql-sqlite")]
20221 #[test]
20222 fn sql_if_exists_replace() {
20223 let frame = make_test_dataframe();
20224 let conn = make_sql_test_conn();
20225 write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20226
20227 write_sql(&frame, &conn, "tbl", SqlIfExists::Replace).unwrap();
20229 let frame2 = read_sql_table(&conn, "tbl").unwrap();
20230 assert_eq!(frame2.index().len(), 3);
20231 }
20232
20233 #[cfg(feature = "sql-sqlite")]
20234 #[test]
20235 fn sql_if_exists_append() {
20236 let frame = make_test_dataframe();
20237 let conn = make_sql_test_conn();
20238 write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20239
20240 write_sql(&frame, &conn, "tbl", SqlIfExists::Append).unwrap();
20242 let frame2 = read_sql_table(&conn, "tbl").unwrap();
20243 assert_eq!(frame2.index().len(), 6); }
20245
20246 #[cfg(feature = "sql-sqlite")]
20247 #[test]
20248 fn sql_with_nulls() {
20249 use fp_types::DType;
20250
20251 let mut columns = BTreeMap::new();
20252 columns.insert(
20253 "vals".to_string(),
20254 Column::new(
20255 DType::Float64,
20256 vec![
20257 Scalar::Float64(1.0),
20258 Scalar::Null(NullKind::NaN),
20259 Scalar::Float64(3.0),
20260 ],
20261 )
20262 .unwrap(),
20263 );
20264
20265 let labels = vec![
20266 IndexLabel::Int64(0),
20267 IndexLabel::Int64(1),
20268 IndexLabel::Int64(2),
20269 ];
20270 let frame =
20271 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20272 .unwrap();
20273
20274 let conn = make_sql_test_conn();
20275 write_sql(&frame, &conn, "nulltest", SqlIfExists::Fail).unwrap();
20276 let frame2 = read_sql_table(&conn, "nulltest").unwrap();
20277
20278 assert_eq!(
20279 frame2.column("vals").unwrap().values()[0],
20280 Scalar::Float64(1.0)
20281 );
20282 assert!(frame2.column("vals").unwrap().values()[1].is_missing());
20283 assert_eq!(
20284 frame2.column("vals").unwrap().values()[2],
20285 Scalar::Float64(3.0)
20286 );
20287 }
20288
20289 #[cfg(feature = "sql-sqlite")]
20290 #[test]
20291 fn sql_bool_roundtrip() {
20292 use fp_types::DType;
20293
20294 let mut columns = BTreeMap::new();
20295 columns.insert(
20296 "flags".to_string(),
20297 Column::new(
20298 DType::Bool,
20299 vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
20300 )
20301 .unwrap(),
20302 );
20303
20304 let labels = vec![
20305 IndexLabel::Int64(0),
20306 IndexLabel::Int64(1),
20307 IndexLabel::Int64(2),
20308 ];
20309 let frame = DataFrame::new_with_column_order(
20310 Index::new(labels),
20311 columns,
20312 vec!["flags".to_string()],
20313 )
20314 .unwrap();
20315
20316 let conn = make_sql_test_conn();
20317 write_sql(&frame, &conn, "booltest", SqlIfExists::Fail).unwrap();
20318 let frame2 = read_sql_table(&conn, "booltest").unwrap();
20319
20320 assert_eq!(
20322 frame2.column("flags").unwrap().values()[0],
20323 Scalar::Int64(1)
20324 );
20325 assert_eq!(
20326 frame2.column("flags").unwrap().values()[1],
20327 Scalar::Int64(0)
20328 );
20329 }
20330
20331 #[cfg(feature = "sql-sqlite")]
20332 #[test]
20333 fn sql_invalid_table_name_rejected() {
20334 let conn = make_sql_test_conn();
20335 let err = read_sql_table(&conn, "Robert'; DROP TABLE students; --");
20336 assert!(err.is_err());
20337 assert!(
20338 matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("invalid table name")),
20339 );
20340 }
20341
20342 #[cfg(feature = "sql-sqlite")]
20343 #[test]
20344 fn sql_empty_table_name_rejected() {
20345 let conn = make_sql_test_conn();
20346 let err = read_sql_table(&conn, "");
20347 assert!(err.is_err());
20348 assert!(
20349 matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("invalid table name")),
20350 );
20351
20352 let frame = make_test_dataframe();
20353 let err = write_sql(&frame, &conn, "", SqlIfExists::Fail);
20354 assert!(err.is_err());
20355 }
20356
20357 #[cfg(feature = "sql-sqlite")]
20358 #[test]
20359 fn sql_empty_result() {
20360 let conn = make_sql_test_conn();
20361 conn.execute_batch("CREATE TABLE empty (x INTEGER, y TEXT)")
20362 .unwrap();
20363 let frame = read_sql_table(&conn, "empty").unwrap();
20364 assert_eq!(frame.index().len(), 0);
20365 assert_eq!(frame.column_names().len(), 2);
20366 assert_eq!(frame.column("x").unwrap().dtype(), DType::Int64);
20367 assert_eq!(frame.column("y").unwrap().dtype(), DType::Utf8);
20368
20369 conn.execute_batch(
20370 "CREATE TABLE typed_nulls (i INTEGER, r REAL, t TEXT);
20371 INSERT INTO typed_nulls VALUES (NULL, NULL, NULL);",
20372 )
20373 .unwrap();
20374 let null_frame = read_sql_table(&conn, "typed_nulls").unwrap();
20375 assert_eq!(null_frame.index().len(), 1);
20376 assert_eq!(null_frame.column("i").unwrap().dtype(), DType::Int64);
20377 assert_eq!(null_frame.column("r").unwrap().dtype(), DType::Float64);
20378 assert_eq!(null_frame.column("t").unwrap().dtype(), DType::Utf8);
20379 }
20380
20381 #[cfg(feature = "sql-sqlite")]
20382 #[test]
20383 fn sql_empty_filtered_query_preserves_declared_dtypes() {
20384 let conn = make_sql_test_conn();
20385 conn.execute_batch(
20386 "CREATE TABLE filtered_empty (i INTEGER, r REAL, t TEXT);
20387 INSERT INTO filtered_empty VALUES (1, 1.25, 'kept');",
20388 )
20389 .unwrap();
20390
20391 let frame = read_sql_with_options(
20392 &conn,
20393 "SELECT i, r, t FROM filtered_empty WHERE i > ?",
20394 &SqlReadOptions {
20395 params: Some(vec![Scalar::Int64(10)]),
20396 ..SqlReadOptions::default()
20397 },
20398 )
20399 .expect("empty filtered query must preserve cursor dtype hints");
20400
20401 assert_eq!(frame.index().len(), 0);
20402 assert_eq!(frame.column_names(), vec!["i", "r", "t"]);
20403 assert_eq!(frame.column("i").unwrap().dtype(), DType::Int64);
20404 assert_eq!(frame.column("r").unwrap().dtype(), DType::Float64);
20405 assert_eq!(frame.column("t").unwrap().dtype(), DType::Utf8);
20406 }
20407
20408 #[cfg(feature = "sql-sqlite")]
20409 #[test]
20410 fn sql_extension_trait() {
20411 let frame = make_test_dataframe();
20412 let conn = make_sql_test_conn();
20413
20414 use super::DataFrameIoExt;
20416 frame.to_sql(&conn, "ext_test", SqlIfExists::Fail).unwrap();
20417 frame
20418 .to_sql_with_options(
20419 &conn,
20420 "ext_test_options",
20421 &SqlWriteOptions {
20422 if_exists: SqlIfExists::Fail,
20423 index: false,
20424 index_label: None,
20425 schema: None,
20426 dtype: None,
20427 method: SqlInsertMethod::Single,
20428 chunksize: None,
20429 },
20430 )
20431 .unwrap();
20432
20433 let frame2 = read_sql_table(&conn, "ext_test").unwrap();
20434 assert_eq!(frame2.index().len(), 3);
20435 let frame3 = read_sql_table(&conn, "ext_test_options").unwrap();
20436 assert_eq!(frame3.index().len(), 3);
20437 }
20438
20439 #[cfg(feature = "sql-sqlite")]
20440 #[test]
20441 fn series_sql_extension_aliases_roundtrip_to_single_column_table() {
20442 use super::SeriesIoExt;
20443
20444 let source = Series::from_values(
20445 "sales",
20446 vec!["r1".into(), "r2".into()],
20447 vec![Scalar::Int64(10), Scalar::Int64(12)],
20448 )
20449 .expect("source series");
20450
20451 let conn = make_sql_test_conn();
20452 source
20453 .to_sql(&conn, "series_ext", SqlIfExists::Fail)
20454 .expect("series to_sql");
20455 let roundtrip = read_sql_table(&conn, "series_ext").expect("read series table");
20456 assert_eq!(roundtrip.column_names(), vec!["index", "sales"]);
20457 assert_eq!(
20458 roundtrip.column("index").expect("index column").values(),
20459 &[Scalar::Utf8("r1".into()), Scalar::Utf8("r2".into())]
20460 );
20461 assert_eq!(
20462 roundtrip.column("sales").expect("sales column").values(),
20463 source.values()
20464 );
20465
20466 source
20467 .to_sql_with_options(
20468 &conn,
20469 "series_ext_no_index",
20470 &SqlWriteOptions {
20471 if_exists: SqlIfExists::Fail,
20472 index: false,
20473 index_label: None,
20474 schema: None,
20475 dtype: None,
20476 method: SqlInsertMethod::Single,
20477 chunksize: None,
20478 },
20479 )
20480 .expect("series to_sql index false");
20481 let no_index =
20482 read_sql_table(&conn, "series_ext_no_index").expect("read no-index series table");
20483 assert_eq!(no_index.column_names(), vec!["sales"]);
20484 assert_eq!(
20485 no_index.column("sales").expect("sales column").values(),
20486 source.values()
20487 );
20488 }
20489
20490 #[test]
20493 fn feather_bytes_roundtrip() {
20494 let frame = make_test_dataframe();
20495 let bytes = super::write_feather_bytes(&frame).expect("write feather");
20496 assert!(!bytes.is_empty());
20497
20498 let frame2 = super::read_feather_bytes(&bytes).expect("read feather");
20499 assert_eq!(frame2.index().len(), 3);
20500
20501 let ints = frame2.column("ints").unwrap();
20503 assert_eq!(ints.values()[0], Scalar::Int64(10));
20504 assert_eq!(ints.values()[1], Scalar::Int64(20));
20505 assert_eq!(ints.values()[2], Scalar::Int64(30));
20506
20507 let floats = frame2.column("floats").unwrap();
20508 assert_eq!(floats.values()[0], Scalar::Float64(1.5));
20509 assert_eq!(floats.values()[2], Scalar::Float64(3.5));
20510
20511 let names = frame2.column("names").unwrap();
20512 assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
20513 assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
20514 }
20515
20516 #[test]
20517 fn feather_row_multiindex_roundtrip_restores_logical_row_axis() {
20518 let frame = make_row_multiindex_test_dataframe();
20519 let bytes = super::write_feather_bytes(&frame).expect("write feather");
20520 let roundtrip = super::read_feather_bytes(&bytes).expect("read feather");
20521
20522 assert!(roundtrip.equals(&frame));
20523 assert!(roundtrip.column("__index_level_0__").is_none());
20524 assert_eq!(
20525 roundtrip
20526 .row_multiindex()
20527 .expect("row multiindex should be restored")
20528 .get_level_values(1)
20529 .unwrap()
20530 .labels(),
20531 frame
20532 .row_multiindex()
20533 .expect("source row multiindex")
20534 .get_level_values(1)
20535 .unwrap()
20536 .labels()
20537 );
20538 }
20539
20540 #[test]
20541 fn feather_file_roundtrip() {
20542 let frame = make_test_dataframe();
20543 let dir = std::env::temp_dir();
20544 let path = dir.join("fp_io_test_feather_roundtrip.feather");
20545
20546 super::write_feather(&frame, &path).expect("write feather file");
20547 let frame2 = super::read_feather(&path).expect("read feather file");
20548 assert_eq!(frame2.index().len(), 3);
20549 assert_eq!(
20550 frame2.column("ints").unwrap().values()[0],
20551 Scalar::Int64(10)
20552 );
20553 std::fs::remove_file(&path).ok();
20554 }
20555
20556 #[test]
20557 fn ipc_stream_bytes_roundtrip() {
20558 let frame = make_test_dataframe();
20559 let bytes = super::write_ipc_stream_bytes(&frame).expect("write ipc stream");
20560 assert!(!bytes.is_empty());
20561
20562 let frame2 = super::read_ipc_stream_bytes(&bytes).expect("read ipc stream");
20563 assert_eq!(frame2.index().len(), 3);
20564 assert_eq!(
20565 frame2.column("ints").unwrap().values()[1],
20566 Scalar::Int64(20)
20567 );
20568 assert_eq!(
20569 frame2.column("names").unwrap().values()[1],
20570 Scalar::Utf8("bob".into())
20571 );
20572 }
20573
20574 #[test]
20575 fn ipc_stream_row_multiindex_roundtrip_restores_logical_row_axis() {
20576 let frame = make_row_multiindex_test_dataframe();
20577 let bytes = super::write_ipc_stream_bytes(&frame).expect("write ipc stream");
20578 let roundtrip = super::read_ipc_stream_bytes(&bytes).expect("read ipc stream");
20579
20580 assert!(roundtrip.equals(&frame));
20581 assert!(roundtrip.row_multiindex().is_some());
20582 }
20583
20584 #[test]
20585 fn feather_with_nulls() {
20586 use fp_types::DType;
20587
20588 let mut columns = BTreeMap::new();
20589 columns.insert(
20590 "vals".to_string(),
20591 Column::new(
20592 DType::Float64,
20593 vec![
20594 Scalar::Float64(1.0),
20595 Scalar::Null(NullKind::NaN),
20596 Scalar::Float64(3.0),
20597 ],
20598 )
20599 .unwrap(),
20600 );
20601
20602 let labels = vec![
20603 IndexLabel::Int64(0),
20604 IndexLabel::Int64(1),
20605 IndexLabel::Int64(2),
20606 ];
20607 let frame =
20608 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20609 .unwrap();
20610
20611 let bytes = super::write_feather_bytes(&frame).expect("write");
20612 let frame2 = super::read_feather_bytes(&bytes).expect("read");
20613
20614 assert_eq!(
20615 frame2.column("vals").unwrap().values()[0],
20616 Scalar::Float64(1.0)
20617 );
20618 assert!(frame2.column("vals").unwrap().values()[1].is_missing());
20619 assert_eq!(
20620 frame2.column("vals").unwrap().values()[2],
20621 Scalar::Float64(3.0)
20622 );
20623 }
20624
20625 #[test]
20626 fn feather_nullable_int_roundtrip_preserves_int_dtype() {
20627 use fp_types::DType;
20628
20629 let mut columns = BTreeMap::new();
20630 columns.insert(
20631 "vals".to_string(),
20632 Column::new(
20633 DType::Int64,
20634 vec![
20635 Scalar::Int64(10),
20636 Scalar::Null(NullKind::Null),
20637 Scalar::Int64(30),
20638 ],
20639 )
20640 .unwrap(),
20641 );
20642
20643 let labels = vec![
20644 IndexLabel::Int64(0),
20645 IndexLabel::Int64(1),
20646 IndexLabel::Int64(2),
20647 ];
20648 let frame =
20649 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20650 .unwrap();
20651
20652 let bytes = super::write_feather_bytes(&frame).expect("write");
20653 let frame2 = super::read_feather_bytes(&bytes).expect("read");
20654 let vals = frame2.column("vals").unwrap();
20655
20656 assert_eq!(vals.dtype(), DType::Int64);
20657 assert_eq!(vals.values()[0], Scalar::Int64(10));
20658 assert_eq!(vals.values()[1], Scalar::Null(NullKind::Null));
20659 assert_eq!(vals.values()[2], Scalar::Int64(30));
20660 }
20661
20662 #[test]
20663 fn series_arrow_array_nullable_int_roundtrip() {
20664 let series = Series::from_values(
20665 "vals",
20666 vec![
20667 IndexLabel::Utf8("r0".into()),
20668 IndexLabel::Utf8("r1".into()),
20669 IndexLabel::Utf8("r2".into()),
20670 ],
20671 vec![
20672 Scalar::Int64(10),
20673 Scalar::Null(NullKind::Null),
20674 Scalar::Int64(30),
20675 ],
20676 )
20677 .unwrap();
20678
20679 let (dt, arr) = super::series_to_arrow_array(&series).expect("arrow encode");
20680 assert_eq!(dt, ArrowDataType::Int64);
20681
20682 let typed = arr
20683 .as_any()
20684 .downcast_ref::<Int64Array>()
20685 .expect("int64 arrow array");
20686 assert_eq!(typed.value(0), 10);
20687 assert!(typed.is_null(1));
20688 assert_eq!(typed.value(2), 30);
20689
20690 let roundtrip = super::series_from_arrow_array(
20691 series.name(),
20692 series.index().labels().to_vec(),
20693 arr.as_ref(),
20694 &dt,
20695 )
20696 .expect("arrow decode");
20697
20698 assert_eq!(roundtrip.name(), "vals");
20699 assert_eq!(roundtrip.index().labels(), series.index().labels());
20700 assert_eq!(roundtrip.column().dtype(), DType::Int64);
20701 assert_eq!(roundtrip.values(), series.values());
20702 }
20703
20704 #[test]
20705 fn feather_bool_column() {
20706 use fp_types::DType;
20707
20708 let mut columns = BTreeMap::new();
20709 columns.insert(
20710 "flags".to_string(),
20711 Column::new(
20712 DType::Bool,
20713 vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
20714 )
20715 .unwrap(),
20716 );
20717
20718 let labels = vec![
20719 IndexLabel::Int64(0),
20720 IndexLabel::Int64(1),
20721 IndexLabel::Int64(2),
20722 ];
20723 let frame = DataFrame::new_with_column_order(
20724 Index::new(labels),
20725 columns,
20726 vec!["flags".to_string()],
20727 )
20728 .unwrap();
20729
20730 let bytes = super::write_feather_bytes(&frame).expect("write");
20731 let frame2 = super::read_feather_bytes(&bytes).expect("read");
20732
20733 assert_eq!(
20734 frame2.column("flags").unwrap().values()[0],
20735 Scalar::Bool(true)
20736 );
20737 assert_eq!(
20738 frame2.column("flags").unwrap().values()[1],
20739 Scalar::Bool(false)
20740 );
20741 }
20742
20743 #[test]
20744 fn feather_preserves_column_order() {
20745 let frame = make_test_dataframe();
20746 let bytes = super::write_feather_bytes(&frame).expect("write");
20747 let frame2 = super::read_feather_bytes(&bytes).expect("read");
20748
20749 assert_eq!(
20750 frame2
20751 .column_names()
20752 .iter()
20753 .map(|s| s.as_str())
20754 .collect::<Vec<_>>(),
20755 frame
20756 .column_names()
20757 .iter()
20758 .map(|s| s.as_str())
20759 .collect::<Vec<_>>()
20760 );
20761 }
20762
20763 #[test]
20764 fn feather_extension_trait() {
20765 use super::DataFrameIoExt;
20766
20767 let frame = make_test_dataframe();
20768 let bytes = frame.to_feather_bytes().unwrap();
20769 let frame2 = super::read_feather_bytes(&bytes).unwrap();
20770 assert_eq!(frame2.index().len(), 3);
20771 }
20772
20773 #[test]
20778 fn csv_nrows_limits_rows() {
20779 let input = "x\n1\n2\n3\n4\n5\n";
20780 let opts = CsvReadOptions {
20781 nrows: Some(3),
20782 ..Default::default()
20783 };
20784 let frame = read_csv_with_options(input, &opts).expect("parse");
20785 assert_eq!(frame.index().len(), 3);
20786 assert_eq!(frame.column("x").unwrap().values()[2], Scalar::Int64(3));
20787 }
20788
20789 #[test]
20790 fn csv_skiprows_skips_data_rows() {
20791 let input = "x\n1\n2\n3\n4\n5\n";
20792 let opts = CsvReadOptions {
20793 skiprows: 2,
20794 ..Default::default()
20795 };
20796 let frame = read_csv_with_options(input, &opts).expect("parse");
20797 assert_eq!(frame.index().len(), 3); assert_eq!(frame.column("2").unwrap().values()[0], Scalar::Int64(3));
20799 }
20800
20801 #[test]
20802 fn csv_skiprows_and_nrows_combined() {
20803 let input = "x\n1\n2\n3\n4\n5\n";
20804 let opts = CsvReadOptions {
20805 skiprows: 1,
20806 nrows: Some(2),
20807 ..Default::default()
20808 };
20809 let frame = read_csv_with_options(input, &opts).expect("parse");
20810 assert_eq!(frame.index().len(), 2); assert_eq!(frame.column("1").unwrap().values()[0], Scalar::Int64(2));
20812 assert_eq!(frame.column("1").unwrap().values()[1], Scalar::Int64(3));
20813 }
20814
20815 #[test]
20816 fn csv_usecols_selects_columns() {
20817 let input = "a,b,c\n1,2,3\n4,5,6\n";
20818 let opts = CsvReadOptions {
20819 usecols: Some(vec!["a".into(), "c".into()]),
20820 ..Default::default()
20821 };
20822 let frame = read_csv_with_options(input, &opts).expect("parse");
20823 assert_eq!(frame.column_names().len(), 2);
20824 assert!(frame.column("a").is_some());
20825 assert!(frame.column("b").is_none());
20826 assert!(frame.column("c").is_some());
20827 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
20828 assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Int64(6));
20829 }
20830
20831 #[test]
20832 fn csv_usecols_nonexistent_column_errors() {
20833 let input = "a,b\n1,2\n";
20834 let opts = CsvReadOptions {
20835 usecols: Some(vec!["a".into(), "nonexistent".into()]),
20836 ..Default::default()
20837 };
20838 let err = read_csv_with_options(input, &opts).expect_err("missing usecols should error");
20839 assert!(matches!(err, IoError::MissingUsecols(_)));
20840 }
20841
20842 #[test]
20843 fn csv_dtype_coercion() {
20844 let input = "id,score\n1,95\n2,87\n";
20845 let mut dtype_map = std::collections::HashMap::new();
20846 dtype_map.insert("score".to_owned(), fp_types::DType::Float64);
20847 let opts = CsvReadOptions {
20848 dtype: Some(dtype_map),
20849 ..Default::default()
20850 };
20851 let frame = read_csv_with_options(input, &opts).expect("parse");
20852 assert_eq!(
20854 frame.column("score").unwrap().values()[0],
20855 Scalar::Float64(95.0)
20856 );
20857 assert_eq!(
20858 frame.column("score").unwrap().values()[1],
20859 Scalar::Float64(87.0)
20860 );
20861 assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
20863 }
20864
20865 #[test]
20866 fn csv_dtype_coercion_invalid_value_errors() {
20867 let input = "id,score\n1,abc\n";
20868 let mut dtype_map = std::collections::HashMap::new();
20869 dtype_map.insert("score".to_owned(), fp_types::DType::Int64);
20870 let opts = CsvReadOptions {
20871 dtype: Some(dtype_map),
20872 ..Default::default()
20873 };
20874 let err = read_csv_with_options(input, &opts).expect_err("invalid cast must error");
20875 assert!(matches!(
20876 err,
20877 IoError::Column(fp_columnar::ColumnError::Type(
20878 fp_types::TypeError::InvalidCast { .. }
20879 ))
20880 ));
20881 }
20882
20883 #[test]
20884 fn csv_skiprows_beyond_data_errors() {
20885 let input = "x\n1\n2\n";
20886 let opts = CsvReadOptions {
20887 skiprows: 100,
20888 ..Default::default()
20889 };
20890 let err = read_csv_with_options(input, &opts).expect_err("skiprows removes header");
20891 assert!(matches!(err, IoError::MissingHeaders));
20892 }
20893
20894 #[test]
20895 fn csv_nrows_zero_returns_empty() {
20896 let input = "x\n1\n2\n3\n";
20897 let opts = CsvReadOptions {
20898 nrows: Some(0),
20899 ..Default::default()
20900 };
20901 let frame = read_csv_with_options(input, &opts).expect("parse");
20902 assert_eq!(frame.index().len(), 0);
20903 }
20904
20905 #[test]
20906 fn csv_decimal_comma_parses_quoted_float_fields() {
20907 let input = "price\n\"1,50\"\n\"3,75\"\n";
20908 let opts = CsvReadOptions {
20909 decimal: b',',
20910 ..Default::default()
20911 };
20912 let frame = read_csv_with_options(input, &opts).expect("parse");
20913 assert_eq!(
20914 frame.column("price").unwrap().values(),
20915 &[Scalar::Float64(1.5), Scalar::Float64(3.75)]
20916 );
20917 }
20918
20919 #[test]
20920 fn csv_default_decimal_keeps_comma_decimal_strings_as_utf8() {
20921 let input = "price\n\"1,50\"\n";
20922 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20923 assert_eq!(
20924 frame.column("price").unwrap().values(),
20925 &[Scalar::Utf8("1,50".to_owned())]
20926 );
20927 }
20928
20929 #[test]
20930 fn csv_true_false_values_do_not_override_numeric_inference() {
20931 let input = "flag\n1\n0\n";
20932 let opts = CsvReadOptions {
20933 true_values: vec!["1".to_owned()],
20934 false_values: vec!["0".to_owned()],
20935 ..Default::default()
20936 };
20937 let frame = read_csv_with_options(input, &opts).expect("parse");
20938 assert_eq!(
20939 frame.column("flag").unwrap().values(),
20940 &[Scalar::Int64(1), Scalar::Int64(0)]
20941 );
20942 }
20943
20944 #[test]
20945 fn csv_true_false_values_convert_non_numeric_tokens() {
20946 let input = "flag\nyes\nno\n";
20947 let opts = CsvReadOptions {
20948 true_values: vec!["yes".to_owned()],
20949 false_values: vec!["no".to_owned()],
20950 ..Default::default()
20951 };
20952 let frame = read_csv_with_options(input, &opts).expect("parse");
20953 assert_eq!(
20954 frame.column("flag").unwrap().values(),
20955 &[Scalar::Bool(true), Scalar::Bool(false)]
20956 );
20957 }
20958
20959 #[test]
20960 fn csv_default_parsing_keeps_numeric_boolean_tokens_as_ints() {
20961 let input = "flag\n1\n0\n";
20962 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20963 assert_eq!(
20964 frame.column("flag").unwrap().values(),
20965 &[Scalar::Int64(1), Scalar::Int64(0)]
20966 );
20967 }
20968
20969 #[test]
20970 fn csv_missing_numeric_column_preserves_int() {
20971 let input = "a,b,c\n,NA,NaN\n1,,x\n";
20973 let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20974 assert_eq!(
20975 frame.column("a").unwrap().values(),
20976 &[Scalar::Null(NullKind::Null), Scalar::Int64(1)]
20977 );
20978 assert!(frame.column("b").unwrap().values()[0].is_missing());
20979 assert!(frame.column("b").unwrap().values()[1].is_missing());
20980 assert_eq!(
20981 frame.column("c").unwrap().values(),
20982 &[Scalar::Null(NullKind::Null), Scalar::Utf8("x".to_owned())]
20983 );
20984 }
20985
20986 #[test]
20987 fn csv_parse_dates_mixed_naive_and_aware_strings_normalizes_per_value() {
20988 let input = "ts,value\n2024-01-15 10:30:00,1\n2024-01-15T10:30:00Z,2\n";
20997 let opts = CsvReadOptions {
20998 parse_dates: Some(vec!["ts".to_owned()]),
20999 ..Default::default()
21000 };
21001 let frame = read_csv_with_options(input, &opts).expect("parse");
21002 assert_eq!(
21003 frame.column("ts").unwrap().values(),
21004 &[
21005 Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21006 Scalar::Utf8("2024-01-15 10:30:00+00:00".to_owned()),
21007 ]
21008 );
21009 assert_eq!(
21010 frame.column("value").unwrap().values(),
21011 &[Scalar::Int64(1), Scalar::Int64(2)]
21012 );
21013 }
21014
21015 #[test]
21016 fn csv_parse_dates_combined_columns_replaces_source_columns() {
21017 let input = "date,time,value\n2024-01-15,10:30:00,1\n2024-01-16,11:45:30,2\n";
21018 let opts = CsvReadOptions {
21019 parse_date_combinations: Some(vec![vec!["date".to_owned(), "time".to_owned()]]),
21020 ..Default::default()
21021 };
21022 let frame = read_csv_with_options(input, &opts).expect("parse");
21023 assert_eq!(frame.column_names(), vec!["date_time", "value"]);
21024 assert_eq!(
21025 frame.column("date_time").unwrap().values(),
21026 &[
21027 Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21028 Scalar::Utf8("2024-01-16 11:45:30".to_owned()),
21029 ]
21030 );
21031 assert!(frame.column("date").is_none());
21032 assert!(frame.column("time").is_none());
21033 assert_eq!(
21034 frame.column("value").unwrap().values(),
21035 &[Scalar::Int64(1), Scalar::Int64(2)]
21036 );
21037 }
21038
21039 #[test]
21040 fn csv_parse_date_combinations_named_uses_caller_supplied_name() {
21041 let input = "date,time,value\n2024-01-15,10:30:00,1\n2024-01-16,11:45:30,2\n";
21042 let opts = CsvReadOptions {
21043 parse_date_combinations_named: Some(vec![(
21044 "timestamp".to_owned(),
21045 vec!["date".to_owned(), "time".to_owned()],
21046 )]),
21047 ..Default::default()
21048 };
21049 let frame = read_csv_with_options(input, &opts).expect("parse");
21050 assert_eq!(frame.column_names(), vec!["timestamp", "value"]);
21053 assert!(frame.column("date").is_none());
21054 assert!(frame.column("time").is_none());
21055 assert_eq!(
21056 frame.column("timestamp").unwrap().values(),
21057 &[
21058 Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21059 Scalar::Utf8("2024-01-16 11:45:30".to_owned()),
21060 ]
21061 );
21062 }
21063
21064 #[test]
21065 fn csv_parse_date_combinations_named_multiple_groups() {
21066 let input = "d1,t1,d2,t2,value\n2024-01-01,09:00:00,2024-01-01,17:00:00,10\n2024-02-01,09:00:00,2024-02-01,17:00:00,20\n";
21067 let opts = CsvReadOptions {
21068 parse_date_combinations_named: Some(vec![
21069 ("start".to_owned(), vec!["d1".to_owned(), "t1".to_owned()]),
21070 ("end".to_owned(), vec!["d2".to_owned(), "t2".to_owned()]),
21071 ]),
21072 ..Default::default()
21073 };
21074 let frame = read_csv_with_options(input, &opts).expect("parse");
21075 let names = frame.column_names();
21076 assert!(names.iter().any(|n| n.as_str() == "start"));
21077 assert!(names.iter().any(|n| n.as_str() == "end"));
21078 assert!(!names.iter().any(|n| n.as_str() == "d1"));
21079 assert!(!names.iter().any(|n| n.as_str() == "t2"));
21080 assert_eq!(
21081 frame.column("value").unwrap().values(),
21082 &[Scalar::Int64(10), Scalar::Int64(20)]
21083 );
21084 assert_eq!(
21085 frame.column("start").unwrap().values(),
21086 &[
21087 Scalar::Utf8("2024-01-01 09:00:00".to_owned()),
21088 Scalar::Utf8("2024-02-01 09:00:00".to_owned()),
21089 ]
21090 );
21091 }
21092
21093 #[test]
21094 fn csv_parse_date_combinations_named_rejects_duplicate_output_names() {
21095 let input = "a,b,c,d\n2024,01,2024,02\n";
21096 let opts = CsvReadOptions {
21097 parse_date_combinations_named: Some(vec![
21098 ("ts".to_owned(), vec!["a".to_owned(), "b".to_owned()]),
21099 ("ts".to_owned(), vec!["c".to_owned(), "d".to_owned()]),
21100 ]),
21101 ..Default::default()
21102 };
21103 let err = read_csv_with_options(input, &opts).unwrap_err();
21104 assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "ts"));
21105 }
21106
21107 #[test]
21108 fn csv_parse_date_combinations_named_rejects_missing_source_column() {
21109 let input = "date,time,value\n2024-01-01,09:00:00,1\n";
21110 let opts = CsvReadOptions {
21111 parse_date_combinations_named: Some(vec![(
21112 "ts".to_owned(),
21113 vec!["date".to_owned(), "missing".to_owned()],
21114 )]),
21115 ..Default::default()
21116 };
21117 let err = read_csv_with_options(input, &opts).unwrap_err();
21118 assert!(matches!(err, IoError::MissingParseDateColumns(_)));
21119 }
21120
21121 #[test]
21122 fn csv_parse_date_combinations_named_empty_sources_skipped() {
21123 let input = "a,b\n1,2\n";
21124 let opts = CsvReadOptions {
21125 parse_date_combinations_named: Some(vec![("unused".to_owned(), Vec::new())]),
21126 ..Default::default()
21127 };
21128 let frame = read_csv_with_options(input, &opts).expect("parse");
21129 assert_eq!(frame.column_names(), vec!["a", "b"]);
21131 }
21132
21133 #[test]
21136 fn jsonl_write_read_roundtrip() {
21137 let frame = make_test_dataframe();
21138 let jsonl = super::write_jsonl_string(&frame).expect("JSONL write failed");
21139
21140 let line_count = jsonl.lines().count();
21142 assert_eq!(line_count, 3, "3 rows = 3 lines");
21143
21144 let back = super::read_jsonl_str(&jsonl).expect("JSONL read failed");
21145 assert_eq!(back.index().len(), 3);
21146 assert_eq!(back.column("ints").unwrap().values()[0], Scalar::Int64(10));
21147 assert_eq!(
21148 back.column("names").unwrap().values()[2],
21149 Scalar::Utf8("carol".into())
21150 );
21151 }
21152
21153 #[test]
21154 fn jsonl_preserves_column_order() {
21155 let input = r#"
21156{"b":1,"a":2}
21157{"c":3}
21158"#;
21159 let frame = super::read_jsonl_str(input).expect("JSONL read failed");
21160 let order: Vec<&str> = frame
21161 .column_names()
21162 .iter()
21163 .map(|name| name.as_str())
21164 .collect();
21165 assert_eq!(order, vec!["b", "a", "c"]);
21166 }
21167
21168 #[test]
21169 fn jsonl_each_line_is_valid_json() {
21170 let frame = make_test_dataframe();
21171 let jsonl = super::write_jsonl_string(&frame).unwrap();
21172
21173 for (i, line) in jsonl.lines().enumerate() {
21174 let parsed: serde_json::Value =
21175 serde_json::from_str(line).expect("jsonl line must be valid JSON");
21176 assert!(parsed.is_object(), "line {i} must be a JSON object");
21177 }
21178 }
21179
21180 #[test]
21181 fn jsonl_with_nulls() {
21182 use fp_types::DType;
21183
21184 let mut columns = BTreeMap::new();
21185 columns.insert(
21186 "v".to_string(),
21187 Column::new(
21188 DType::Float64,
21189 vec![
21190 Scalar::Float64(1.0),
21191 Scalar::Null(NullKind::NaN),
21192 Scalar::Float64(3.0),
21193 ],
21194 )
21195 .unwrap(),
21196 );
21197 let labels = vec![
21198 IndexLabel::Int64(0),
21199 IndexLabel::Int64(1),
21200 IndexLabel::Int64(2),
21201 ];
21202 let frame =
21203 DataFrame::new_with_column_order(Index::new(labels), columns, vec!["v".to_string()])
21204 .unwrap();
21205
21206 let jsonl = super::write_jsonl_string(&frame).unwrap();
21207 let back = super::read_jsonl_str(&jsonl).unwrap();
21208 assert!(back.column("v").unwrap().values()[1].is_missing());
21209 }
21210
21211 #[test]
21212 fn jsonl_records_write_preserves_nullable_int_column() {
21213 let frame = DataFrame::from_dict(
21215 &["a"],
21216 vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
21217 )
21218 .unwrap();
21219
21220 let jsonl = super::write_jsonl_string(&frame).expect("write jsonl");
21221 let rows = jsonl
21222 .lines()
21223 .map(|line| serde_json::from_str::<serde_json::Value>(line).unwrap())
21224 .collect::<Vec<_>>();
21225
21226 assert_eq!(
21227 rows,
21228 vec![serde_json::json!({"a": 1}), serde_json::json!({"a": null})]
21229 );
21230 }
21231
21232 #[test]
21233 fn jsonl_empty_input() {
21234 let back = super::read_jsonl_str("").expect("empty JSONL must parse");
21235 assert_eq!(back.index().len(), 0);
21236 }
21237
21238 #[test]
21239 fn jsonl_blank_lines_skipped() {
21240 let input = "{\"a\":1}\n\n{\"a\":2}\n\n";
21241 let back = super::read_jsonl_str(input).expect("JSONL with blanks must parse");
21242 assert_eq!(back.index().len(), 2);
21243 }
21244
21245 #[test]
21246 fn jsonl_non_object_line_errors() {
21247 let input = "{\"a\":1}\n[1,2,3]\n";
21248 let err = super::read_jsonl_str(input);
21249 assert!(err.is_err());
21250 }
21251
21252 #[test]
21253 fn jsonl_different_keys_across_rows() {
21254 let input = "{\"a\":1,\"b\":2}\n{\"a\":3,\"c\":4}\n";
21256 let frame = super::read_jsonl_str(input).expect("JSONL with different keys must parse");
21257 assert_eq!(frame.index().len(), 2);
21258 assert!(frame.column("a").is_some(), "column a must exist");
21260 assert!(frame.column("b").is_some(), "column b must exist");
21261 assert!(frame.column("c").is_some(), "column c must exist");
21262 assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
21264 assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Float64(2.0));
21265 assert!(frame.column("c").unwrap().values()[0].is_missing());
21266 assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
21268 assert!(frame.column("b").unwrap().values()[1].is_missing());
21269 assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Float64(4.0));
21270 }
21271
21272 #[test]
21273 fn adversarial_csv_very_long_field() {
21274 let long_val = "x".repeat(200_000);
21276 let input = format!("col\n{long_val}\n");
21277 let frame = read_csv_str(&input).expect("long field must parse");
21278 assert_eq!(frame.index().len(), 1);
21279 match &frame.column("col").unwrap().values()[0] {
21280 Scalar::Utf8(s) => assert_eq!(s.len(), 200_000),
21281 other => assert!(
21282 matches!(other, Scalar::Utf8(_)),
21283 "expected Utf8 for long field"
21284 ),
21285 }
21286 }
21287
21288 #[test]
21289 fn adversarial_csv_many_columns() {
21290 let ncols = 1000;
21292 let headers: Vec<String> = (0..ncols).map(|i| format!("c{i}")).collect();
21293 let mut csv = headers.join(",");
21294 csv.push('\n');
21295 let vals: Vec<String> = (0..ncols).map(|i| i.to_string()).collect();
21296 csv.push_str(&vals.join(","));
21297 csv.push('\n');
21298
21299 let frame = read_csv_str(&csv).expect("1000-column CSV must parse");
21300 assert_eq!(frame.columns().len(), ncols);
21301 assert_eq!(frame.index().len(), 1);
21302 }
21303
21304 #[test]
21305 fn adversarial_csv_empty_rows() {
21306 let input = "a,b\n1,2\n,\n3,4\n";
21310 let frame = read_csv_str(input).expect("parse");
21311 assert_eq!(frame.index().len(), 3);
21312 assert!(frame.column("a").unwrap().values()[1].is_missing());
21314 }
21315
21316 #[test]
21317 fn adversarial_csv_field_with_newlines_in_quotes() {
21318 let input = "msg\n\"line1\nline2\nline3\"\n\"single\"\n";
21320 let frame = read_csv_str(input).expect("quoted newlines must parse");
21321 assert_eq!(frame.index().len(), 2);
21322 }
21323
21324 #[test]
21325 fn adversarial_csv_header_only_no_data() {
21326 let input = "x,y,z\n";
21327 let frame = read_csv_str(input).expect("header-only must parse");
21328 assert_eq!(frame.index().len(), 0);
21329 assert_eq!(frame.columns().len(), 3);
21330 }
21331
21332 #[test]
21333 fn adversarial_json_deeply_nested_values() {
21334 let input = r#"[{"a":1,"b":{"nested":"value"}}]"#;
21336 let frame = read_json_str(input, JsonOrient::Records).expect("nested JSON must parse");
21337 assert_eq!(frame.index().len(), 1);
21338 let b_val = &frame.column("b").unwrap().values()[0];
21340 assert!(matches!(b_val, Scalar::Utf8(_)));
21341 }
21342
21343 #[test]
21344 fn adversarial_json_i64_max_value() {
21345 let input = format!(r#"[{{"v":{}}}]"#, i64::MAX);
21347 let frame = read_json_str(&input, JsonOrient::Records).expect("i64::MAX must parse");
21348 assert_eq!(
21349 frame.column("v").unwrap().values()[0],
21350 Scalar::Int64(i64::MAX)
21351 );
21352 }
21353
21354 #[test]
21355 fn adversarial_json_i64_min_value() {
21356 let input = format!(r#"[{{"v":{}}}]"#, i64::MIN);
21357 let frame = read_json_str(&input, JsonOrient::Records).expect("i64::MIN must parse");
21358 assert_eq!(
21359 frame.column("v").unwrap().values()[0],
21360 Scalar::Int64(i64::MIN)
21361 );
21362 }
21363
21364 #[test]
21365 fn adversarial_json_float_special_values() {
21366 let input = r#"[{"v":null},{"v":1.7976931348623157e+308}]"#;
21369 let frame = read_json_str(input, JsonOrient::Records).expect("special floats must parse");
21370 assert!(frame.column("v").unwrap().values()[0].is_missing());
21371 if let Scalar::Float64(v) = frame.column("v").unwrap().values()[1] {
21373 assert!(v.is_finite());
21374 }
21375 }
21376
21377 #[test]
21378 fn adversarial_json_empty_records_array() {
21379 let input = r#"[]"#;
21380 let frame = read_json_str(input, JsonOrient::Records).expect("empty array must parse");
21381 assert_eq!(frame.index().len(), 0);
21382 }
21383
21384 #[test]
21385 fn adversarial_json_empty_columns_object() {
21386 let input = r#"{}"#;
21387 let frame = read_json_str(input, JsonOrient::Columns).expect("empty object must parse");
21388 assert_eq!(frame.index().len(), 0);
21389 assert_eq!(frame.columns().len(), 0);
21390 }
21391
21392 #[test]
21393 fn adversarial_csv_unicode_values() {
21394 let input = "name,emoji\n日本語,🎉\nрусский,🚀\n";
21396 let frame = read_csv_str(input).expect("unicode CSV must parse");
21397 assert_eq!(frame.index().len(), 2);
21398 assert_eq!(
21399 frame.column("name").unwrap().values()[0],
21400 Scalar::Utf8("日本語".into())
21401 );
21402 assert_eq!(
21403 frame.column("emoji").unwrap().values()[1],
21404 Scalar::Utf8("🚀".into())
21405 );
21406 }
21407
21408 #[test]
21409 fn adversarial_csv_single_column_no_trailing_newline() {
21410 let input = "val\n42";
21411 let frame = read_csv_str(input).expect("no trailing newline must parse");
21412 assert_eq!(frame.index().len(), 1);
21413 assert_eq!(frame.column("val").unwrap().values()[0], Scalar::Int64(42));
21414 }
21415
21416 #[cfg(feature = "sql-sqlite")]
21417 #[test]
21418 fn adversarial_sql_large_batch_insert() {
21419 let n = 10_000;
21421 let vals: Vec<Scalar> = (0..n).map(|i| Scalar::Int64(i as i64)).collect();
21422 let df = fp_frame::DataFrame::from_dict(&["x"], vec![("x", vals)]).unwrap();
21423
21424 let conn = make_sql_test_conn();
21425 write_sql(&df, &conn, "big_table", SqlIfExists::Fail).unwrap();
21426 let back = read_sql_table(&conn, "big_table").unwrap();
21427 assert_eq!(back.index().len(), n);
21428 assert_eq!(
21429 back.column("x").unwrap().values()[n - 1],
21430 Scalar::Int64((n - 1) as i64)
21431 );
21432 }
21433
21434 #[cfg(feature = "sql-sqlite")]
21435 #[test]
21436 fn adversarial_sql_column_name_with_spaces_accepted() {
21437 let df = fp_frame::DataFrame::from_dict(
21440 &["has space"],
21441 vec![("has space", vec![Scalar::Int64(1)])],
21442 )
21443 .unwrap();
21444
21445 let conn = make_sql_test_conn();
21446 let result = write_sql(&df, &conn, "test_spaces", SqlIfExists::Fail);
21448 assert!(
21449 result.is_ok(),
21450 "columns with spaces should work: {:?}",
21451 result.err()
21452 );
21453
21454 let back = read_sql_table(&conn, "test_spaces").unwrap();
21455 assert!(back.column("has space").is_some());
21456 }
21457
21458 #[cfg(feature = "sql-sqlite")]
21459 #[test]
21460 fn adversarial_sql_column_name_with_quotes_accepted() {
21461 let col_name = "has\"quote";
21462 let df =
21463 fp_frame::DataFrame::from_dict(&[col_name], vec![(col_name, vec![Scalar::Int64(7)])])
21464 .unwrap();
21465
21466 let conn = make_sql_test_conn();
21467 let result = write_sql(&df, &conn, "test_quotes", SqlIfExists::Fail);
21468 assert!(
21469 result.is_ok(),
21470 "columns with quotes should work: {:?}",
21471 result.err()
21472 );
21473
21474 let back = read_sql_table(&conn, "test_quotes").unwrap();
21475 assert_eq!(back.column(col_name).unwrap().values()[0], Scalar::Int64(7));
21476 }
21477
21478 #[cfg(feature = "sql-sqlite")]
21481 #[test]
21482 fn rusqlite_dialect_name_is_sqlite() {
21483 let conn = make_sql_test_conn();
21484 assert_eq!(super::SqlConnection::dialect_name(&conn), "sqlite");
21485 }
21486
21487 #[cfg(feature = "sql-sqlite")]
21488 #[test]
21489 fn rusqlite_supports_returning_is_true() {
21490 let conn = make_sql_test_conn();
21492 assert!(super::SqlConnection::supports_returning(&conn));
21493 }
21494
21495 #[cfg(feature = "sql-sqlite")]
21496 #[test]
21497 fn rusqlite_max_param_count_is_32766() {
21498 let conn = make_sql_test_conn();
21499 assert_eq!(super::SqlConnection::max_param_count(&conn), Some(32766));
21500 }
21501
21502 #[cfg(feature = "sql-sqlite")]
21503 #[test]
21504 fn rusqlite_with_transaction_commits_on_ok() {
21505 let conn = make_sql_test_conn();
21506 super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_test (x INTEGER)").unwrap();
21507 let result: Result<i64, IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21508 super::SqlConnection::execute_batch(c, "INSERT INTO txn_test VALUES (42)")?;
21509 Ok(42)
21510 });
21511 assert_eq!(result.unwrap(), 42);
21512 let row_count =
21514 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_test", &[]).unwrap();
21515 assert_eq!(row_count.rows.len(), 1);
21516 assert_eq!(row_count.rows[0][0], Scalar::Int64(1));
21517 }
21518
21519 #[cfg(feature = "sql-sqlite")]
21520 #[test]
21521 fn rusqlite_with_transaction_rolls_back_on_err() {
21522 let conn = make_sql_test_conn();
21523 super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_rollback (x INTEGER)")
21524 .unwrap();
21525 let result: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21526 super::SqlConnection::execute_batch(c, "INSERT INTO txn_rollback VALUES (99)")?;
21527 Err(IoError::Sql("simulated failure".to_string()))
21528 });
21529 assert!(result.is_err());
21530 let row_count =
21532 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_rollback", &[]).unwrap();
21533 assert_eq!(row_count.rows[0][0], Scalar::Int64(0));
21534 }
21535
21536 #[cfg(feature = "sql-sqlite")]
21537 #[test]
21538 fn rusqlite_with_transaction_rolls_back_on_panic() {
21539 let conn = make_sql_test_conn();
21540 super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_panic (x INTEGER)").unwrap();
21541
21542 let panic_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
21543 let _: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21544 super::SqlConnection::execute_batch(c, "INSERT INTO txn_panic VALUES (99)")?;
21545 std::panic::resume_unwind(Box::new("simulated transaction panic"));
21546 });
21547 }));
21548 assert!(panic_result.is_err());
21549
21550 let row_count =
21551 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_panic", &[]).unwrap();
21552 assert_eq!(row_count.rows[0][0], Scalar::Int64(0));
21553
21554 let result: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21555 super::SqlConnection::execute_batch(c, "INSERT INTO txn_panic VALUES (7)")
21556 });
21557 assert!(result.is_ok());
21558 let rows =
21559 super::SqlConnection::query(&conn, "SELECT x FROM txn_panic ORDER BY x", &[]).unwrap();
21560 assert_eq!(rows.rows, vec![vec![Scalar::Int64(7)]]);
21561 }
21562
21563 #[test]
21564 fn default_capability_probes_are_conservative() {
21565 struct StubSql;
21568 impl super::SqlConnection for StubSql {
21569 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21570 Ok(super::SqlQueryResult {
21571 columns: vec![],
21572 rows: vec![],
21573 })
21574 }
21575 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21576 Ok(())
21577 }
21578 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21579 Ok(false)
21580 }
21581 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21582 Ok(())
21583 }
21584 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21585 "TEXT"
21586 }
21587 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21588 "TEXT"
21589 }
21590 }
21591
21592 let stub = StubSql;
21593 assert_eq!(super::SqlConnection::dialect_name(&stub), "unknown");
21594 assert!(!super::SqlConnection::supports_returning(&stub));
21595 assert_eq!(super::SqlConnection::max_param_count(&stub), None);
21596 let result: Result<i64, IoError> = super::SqlConnection::with_transaction(&stub, |_| Ok(7));
21598 assert_eq!(result.unwrap(), 7);
21599 assert_eq!(
21601 super::SqlConnection::quote_identifier(&stub, "col").unwrap(),
21602 r#""col""#
21603 );
21604 }
21605
21606 #[cfg(feature = "sql-sqlite")]
21609 #[test]
21610 fn rusqlite_quote_identifier_uses_ansi_double_quotes() {
21611 let conn = make_sql_test_conn();
21612 assert_eq!(
21613 super::SqlConnection::quote_identifier(&conn, "users").unwrap(),
21614 r#""users""#
21615 );
21616 }
21617
21618 #[cfg(feature = "sql-sqlite")]
21619 #[test]
21620 fn rusqlite_quote_identifier_doubles_embedded_quotes() {
21621 let conn = make_sql_test_conn();
21622 assert_eq!(
21624 super::SqlConnection::quote_identifier(&conn, r#"value"raw"#).unwrap(),
21625 r#""value""raw""#
21626 );
21627 }
21628
21629 #[cfg(feature = "sql-sqlite")]
21630 #[test]
21631 fn rusqlite_quote_identifier_rejects_null_bytes() {
21632 let conn = make_sql_test_conn();
21633 let err = super::SqlConnection::quote_identifier(&conn, "evil\0name").expect_err("nul");
21634 assert!(matches!(err, IoError::Sql(_)));
21635 }
21636
21637 #[test]
21638 fn default_quote_identifier_doubles_embedded_quotes() {
21639 struct StubSql;
21642 impl super::SqlConnection for StubSql {
21643 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21644 Ok(super::SqlQueryResult {
21645 columns: vec![],
21646 rows: vec![],
21647 })
21648 }
21649 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21650 Ok(())
21651 }
21652 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21653 Ok(false)
21654 }
21655 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21656 Ok(())
21657 }
21658 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21659 "TEXT"
21660 }
21661 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21662 "TEXT"
21663 }
21664 }
21665 let stub = StubSql;
21666 assert_eq!(
21667 super::SqlConnection::quote_identifier(&stub, r#"value"raw"#).unwrap(),
21668 r#""value""raw""#
21669 );
21670 assert!(super::SqlConnection::quote_identifier(&stub, "evil\0").is_err());
21671 }
21672
21673 #[cfg(feature = "sql-sqlite")]
21676 #[test]
21677 fn read_sql_dtype_override_int_to_float() {
21678 let conn = make_sql_test_conn();
21679 super::SqlConnection::execute_batch(
21680 &conn,
21681 "CREATE TABLE amounts (amount INTEGER); INSERT INTO amounts VALUES (1), (2), (3);",
21682 )
21683 .unwrap();
21684 let mut dtype_map = BTreeMap::new();
21685 dtype_map.insert("amount".to_owned(), DType::Float64);
21686 let frame = read_sql_with_options(
21687 &conn,
21688 "SELECT amount FROM amounts ORDER BY amount",
21689 &SqlReadOptions {
21690 params: None,
21691 parse_dates: None,
21692 coerce_float: false,
21693 dtype: Some(dtype_map),
21694 schema: None,
21695 columns: None,
21696 index_col: None,
21697 },
21698 )
21699 .expect("read with dtype");
21700 let col = frame.column("amount").expect("amount");
21701 assert_eq!(col.dtype(), DType::Float64);
21702 assert_eq!(col.values()[0], Scalar::Float64(1.0));
21703 assert_eq!(col.values()[2], Scalar::Float64(3.0));
21704 }
21705
21706 #[cfg(feature = "sql-sqlite")]
21707 #[test]
21708 fn read_sql_dtype_override_unsupported_cast_returns_typed_error() {
21709 let conn = make_sql_test_conn();
21713 super::SqlConnection::execute_batch(
21714 &conn,
21715 "CREATE TABLE labels (id TEXT); INSERT INTO labels VALUES ('yes'), ('no');",
21716 )
21717 .unwrap();
21718 let mut dtype_map = BTreeMap::new();
21719 dtype_map.insert("id".to_owned(), DType::Bool);
21720 let err = read_sql_with_options(
21721 &conn,
21722 "SELECT id FROM labels ORDER BY id",
21723 &SqlReadOptions {
21724 params: None,
21725 parse_dates: None,
21726 coerce_float: false,
21727 dtype: Some(dtype_map),
21728 schema: None,
21729 columns: None,
21730 index_col: None,
21731 },
21732 )
21733 .expect_err("expected dtype override error");
21734 match err {
21735 IoError::Sql(message) => {
21736 assert!(
21737 message.contains("dtype override on column 'id'"),
21738 "unexpected error message: {message}"
21739 );
21740 assert!(
21741 message.contains("Bool"),
21742 "unexpected error message: {message}"
21743 );
21744 }
21745 other => unreachable!("expected IoError::Sql, got {other:?}"),
21746 }
21747 }
21748
21749 #[cfg(feature = "sql-sqlite")]
21750 #[test]
21751 fn read_sql_dtype_override_missing_column_is_ignored() {
21752 let conn = make_sql_test_conn();
21753 super::SqlConnection::execute_batch(
21754 &conn,
21755 "CREATE TABLE t (x INTEGER); INSERT INTO t VALUES (1);",
21756 )
21757 .unwrap();
21758 let mut dtype_map = BTreeMap::new();
21759 dtype_map.insert("nonexistent".to_owned(), DType::Float64);
21760 let frame = read_sql_with_options(
21761 &conn,
21762 "SELECT x FROM t",
21763 &SqlReadOptions {
21764 params: None,
21765 parse_dates: None,
21766 coerce_float: false,
21767 dtype: Some(dtype_map),
21768 schema: None,
21769 columns: None,
21770 index_col: None,
21771 },
21772 )
21773 .expect("read with dtype-on-missing-col");
21774 let col = frame.column("x").expect("x");
21775 assert_eq!(col.dtype(), DType::Int64);
21776 }
21777
21778 #[cfg(feature = "sql-sqlite")]
21779 #[test]
21780 fn read_sql_dtype_override_preserves_nulls() {
21781 let conn = make_sql_test_conn();
21782 super::SqlConnection::execute_batch(
21783 &conn,
21784 "CREATE TABLE nulls_tbl (v INTEGER); INSERT INTO nulls_tbl VALUES (1), (NULL), (3);",
21785 )
21786 .unwrap();
21787 let mut dtype_map = BTreeMap::new();
21788 dtype_map.insert("v".to_owned(), DType::Float64);
21789 let frame = read_sql_with_options(
21790 &conn,
21791 "SELECT v FROM nulls_tbl ORDER BY rowid",
21792 &SqlReadOptions {
21793 params: None,
21794 parse_dates: None,
21795 coerce_float: false,
21796 dtype: Some(dtype_map),
21797 schema: None,
21798 columns: None,
21799 index_col: None,
21800 },
21801 )
21802 .expect("read with dtype + nulls");
21803 let col = frame.column("v").expect("v");
21804 assert_eq!(col.dtype(), DType::Float64);
21805 assert!(col.values()[1].is_missing());
21806 }
21807
21808 #[cfg(feature = "sql-sqlite")]
21809 #[test]
21810 fn read_sql_dtype_skipped_when_column_in_parse_dates() {
21811 let conn = make_sql_test_conn();
21812 super::SqlConnection::execute_batch(
21813 &conn,
21814 "CREATE TABLE evt (ts TEXT); INSERT INTO evt VALUES ('2024-01-01 00:00:00');",
21815 )
21816 .unwrap();
21817 let mut dtype_map = BTreeMap::new();
21818 dtype_map.insert("ts".to_owned(), DType::Float64);
21819 let frame = read_sql_with_options(
21820 &conn,
21821 "SELECT ts FROM evt",
21822 &SqlReadOptions {
21823 params: None,
21824 parse_dates: Some(vec!["ts".to_owned()]),
21825 coerce_float: false,
21826 dtype: Some(dtype_map),
21827 schema: None,
21828 columns: None,
21829 index_col: None,
21830 },
21831 )
21832 .expect("read with parse_dates priority");
21833 let col = frame.column("ts").expect("ts");
21834 assert_eq!(col.dtype(), DType::Utf8);
21835 }
21836
21837 #[cfg(feature = "sql-sqlite")]
21840 #[test]
21841 fn rusqlite_does_not_support_schemas_by_default() {
21842 let conn = make_sql_test_conn();
21843 assert!(!super::SqlConnection::supports_schemas(&conn));
21844 assert_eq!(super::SqlConnection::default_schema(&conn), None);
21845 }
21846
21847 #[test]
21848 fn default_schema_probes_are_conservative() {
21849 struct StubSql;
21853 impl super::SqlConnection for StubSql {
21854 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21855 Ok(super::SqlQueryResult {
21856 columns: vec![],
21857 rows: vec![],
21858 })
21859 }
21860 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21861 Ok(())
21862 }
21863 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21864 Ok(false)
21865 }
21866 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21867 Ok(())
21868 }
21869 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21870 "TEXT"
21871 }
21872 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21873 "TEXT"
21874 }
21875 }
21876 let stub = StubSql;
21877 assert!(!super::SqlConnection::supports_schemas(&stub));
21878 assert_eq!(super::SqlConnection::default_schema(&stub), None);
21879 }
21880
21881 #[test]
21882 fn schema_probe_overrides_take_effect() {
21883 struct PgLikeSqlConn;
21887 impl super::SqlConnection for PgLikeSqlConn {
21888 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21889 Ok(super::SqlQueryResult {
21890 columns: vec![],
21891 rows: vec![],
21892 })
21893 }
21894 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21895 Ok(())
21896 }
21897 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21898 Ok(false)
21899 }
21900 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21901 Ok(())
21902 }
21903 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21904 "TEXT"
21905 }
21906 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21907 "TEXT"
21908 }
21909 fn supports_schemas(&self) -> bool {
21910 true
21911 }
21912 fn default_schema(&self) -> Option<String> {
21913 Some("public".to_owned())
21914 }
21915 }
21916 let conn = PgLikeSqlConn;
21917 assert!(super::SqlConnection::supports_schemas(&conn));
21918 assert_eq!(
21919 super::SqlConnection::default_schema(&conn).as_deref(),
21920 Some("public")
21921 );
21922 }
21923
21924 #[test]
21927 fn sql_select_all_query_no_schema_uses_bare_table() {
21928 struct StubSql;
21929 impl super::SqlConnection for StubSql {
21930 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21931 Ok(super::SqlQueryResult {
21932 columns: vec![],
21933 rows: vec![],
21934 })
21935 }
21936 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21937 Ok(())
21938 }
21939 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21940 Ok(false)
21941 }
21942 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21943 Ok(())
21944 }
21945 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21946 "TEXT"
21947 }
21948 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21949 "TEXT"
21950 }
21951 }
21952 let conn = StubSql;
21953 let q1 = super::sql_select_all_query_in_schema(&conn, "users", None).expect("q1");
21954 assert_eq!(q1, "SELECT * FROM \"users\"");
21955 }
21956
21957 #[test]
21958 fn sql_select_query_with_schema_rejects_non_schema_backend() {
21959 struct StubSql;
21960 impl super::SqlConnection for StubSql {
21961 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21962 Ok(super::SqlQueryResult {
21963 columns: vec![],
21964 rows: vec![],
21965 })
21966 }
21967 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21968 Ok(())
21969 }
21970 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21971 Ok(false)
21972 }
21973 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21974 Ok(())
21975 }
21976 fn dtype_sql(&self, _dtype: DType) -> &'static str {
21977 "TEXT"
21978 }
21979 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21980 "TEXT"
21981 }
21982 fn dialect_name(&self) -> &'static str {
21983 "stub"
21984 }
21985 }
21986 let conn = StubSql;
21987 let err = super::sql_select_all_query_in_schema(&conn, "users", Some("analytics"))
21988 .expect_err("schema must reject when backend has no schema support");
21989 assert!(
21990 matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by stub backend"))
21991 );
21992
21993 let err =
21994 super::sql_select_columns_query_in_schema(&conn, "users", Some("analytics"), &["id"])
21995 .expect_err("projected schema select must reject too");
21996 assert!(
21997 matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by stub backend"))
21998 );
21999 }
22000
22001 #[test]
22002 fn sql_select_all_query_with_schema_qualifies_on_multi_schema_backend() {
22003 struct PgLikeSchemaSql;
22004 impl super::SqlConnection for PgLikeSchemaSql {
22005 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22006 Ok(super::SqlQueryResult {
22007 columns: vec![],
22008 rows: vec![],
22009 })
22010 }
22011 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22012 Ok(())
22013 }
22014 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22015 Ok(false)
22016 }
22017 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22018 Ok(())
22019 }
22020 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22021 "TEXT"
22022 }
22023 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22024 "TEXT"
22025 }
22026 fn supports_schemas(&self) -> bool {
22027 true
22028 }
22029 }
22030 let conn = PgLikeSchemaSql;
22031 let q =
22032 super::sql_select_all_query_in_schema(&conn, "users", Some("analytics")).expect("q");
22033 assert_eq!(q, "SELECT * FROM \"analytics\".\"users\"");
22034 let bare = super::sql_select_all_query_in_schema(&conn, "users", None).expect("bare");
22035 assert_eq!(bare, "SELECT * FROM \"users\"");
22036 }
22037
22038 #[cfg(feature = "sql-sqlite")]
22039 #[test]
22040 fn read_sql_table_with_options_schema_rejected_on_sqlite() {
22041 let conn = make_sql_test_conn();
22042 super::SqlConnection::execute_batch(
22043 &conn,
22044 "CREATE TABLE bare_tbl (x INTEGER); INSERT INTO bare_tbl VALUES (1), (2);",
22045 )
22046 .unwrap();
22047 let err = read_sql_table_with_options(
22048 &conn,
22049 "bare_tbl",
22050 &SqlReadOptions {
22051 params: None,
22052 parse_dates: None,
22053 coerce_float: false,
22054 dtype: None,
22055 schema: Some("ignored_on_sqlite".to_owned()),
22056 columns: None,
22057 index_col: None,
22058 },
22059 )
22060 .expect_err("read_sql_table schema=Some must reject on SQLite");
22061 assert!(
22062 matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by sqlite backend"))
22063 );
22064 }
22065
22066 #[cfg(feature = "sql-sqlite")]
22067 #[test]
22068 fn read_sql_table_chunks_with_options_schema_rejected_on_sqlite() {
22069 let conn = make_sql_test_conn();
22070 super::SqlConnection::execute_batch(
22071 &conn,
22072 "CREATE TABLE chunk_bare_tbl (x INTEGER); INSERT INTO chunk_bare_tbl VALUES (1), (2);",
22073 )
22074 .unwrap();
22075 let err = read_sql_table_chunks_with_options(
22076 &conn,
22077 "chunk_bare_tbl",
22078 &SqlReadOptions {
22079 schema: Some("ignored_on_sqlite".to_owned()),
22080 ..Default::default()
22081 },
22082 1,
22083 )
22084 .expect_err("chunked read_sql_table schema=Some must reject on SQLite");
22085 assert!(
22086 matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by sqlite backend"))
22087 );
22088 }
22089
22090 #[test]
22091 fn sql_select_all_query_in_schema_validates_schema_name() {
22092 struct PgLikeValidate;
22093 impl super::SqlConnection for PgLikeValidate {
22094 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22095 Ok(super::SqlQueryResult {
22096 columns: vec![],
22097 rows: vec![],
22098 })
22099 }
22100 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22101 Ok(())
22102 }
22103 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22104 Ok(false)
22105 }
22106 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22107 Ok(())
22108 }
22109 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22110 "TEXT"
22111 }
22112 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22113 "TEXT"
22114 }
22115 fn supports_schemas(&self) -> bool {
22116 true
22117 }
22118 }
22119 let conn = PgLikeValidate;
22120 let err = super::sql_select_all_query_in_schema(&conn, "users", Some("evil; DROP"))
22121 .expect_err("malformed schema must reject");
22122 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid schema name")));
22125 }
22126
22127 #[test]
22130 fn sql_create_table_query_in_schema_qualifies_on_multi_schema_backend() {
22131 struct PgLikeWrite;
22132 impl super::SqlConnection for PgLikeWrite {
22133 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22134 Ok(super::SqlQueryResult {
22135 columns: vec![],
22136 rows: vec![],
22137 })
22138 }
22139 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22140 Ok(())
22141 }
22142 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22143 Ok(false)
22144 }
22145 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22146 Ok(())
22147 }
22148 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22149 "TEXT"
22150 }
22151 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22152 "TEXT"
22153 }
22154 fn supports_schemas(&self) -> bool {
22155 true
22156 }
22157 }
22158 let conn = PgLikeWrite;
22159 let cols = vec!["id INTEGER".to_owned(), "name TEXT".to_owned()];
22160 let q = super::sql_create_table_query_in_schema(&conn, "users", Some("analytics"), &cols)
22161 .expect("create");
22162 assert_eq!(
22163 q,
22164 "CREATE TABLE IF NOT EXISTS \"analytics\".\"users\" (id INTEGER, name TEXT)"
22165 );
22166 let bare =
22167 super::sql_create_table_query_in_schema(&conn, "users", None, &cols).expect("bare");
22168 assert_eq!(
22169 bare,
22170 "CREATE TABLE IF NOT EXISTS \"users\" (id INTEGER, name TEXT)"
22171 );
22172 }
22173
22174 #[test]
22175 fn sql_insert_rows_query_in_schema_qualifies_on_multi_schema_backend() {
22176 struct PgLikeInsert;
22177 impl super::SqlConnection for PgLikeInsert {
22178 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22179 Ok(super::SqlQueryResult {
22180 columns: vec![],
22181 rows: vec![],
22182 })
22183 }
22184 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22185 Ok(())
22186 }
22187 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22188 Ok(false)
22189 }
22190 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22191 Ok(())
22192 }
22193 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22194 "TEXT"
22195 }
22196 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22197 "TEXT"
22198 }
22199 fn supports_schemas(&self) -> bool {
22200 true
22201 }
22202 }
22203 let conn = PgLikeInsert;
22204 let cols = vec!["id".to_owned(), "name".to_owned()];
22205 let q = super::sql_insert_rows_query_in_schema(&conn, "users", Some("analytics"), &cols)
22206 .expect("insert");
22207 assert_eq!(
22208 q,
22209 "INSERT INTO \"analytics\".\"users\" (\"id\", \"name\") VALUES (?, ?)"
22210 );
22211 }
22212
22213 #[cfg(feature = "sql-sqlite")]
22214 #[test]
22215 fn write_sql_with_options_schema_silently_ignored_on_sqlite() {
22216 let conn = make_sql_test_conn();
22217 let frame = fp_frame::DataFrame::from_dict(
22218 &["x"],
22219 vec![("x", vec![Scalar::Int64(1), Scalar::Int64(2)])],
22220 )
22221 .unwrap();
22222 write_sql_with_options(
22225 &frame,
22226 &conn,
22227 "bare_write_tbl",
22228 &SqlWriteOptions {
22229 if_exists: SqlIfExists::Fail,
22230 index: false,
22231 index_label: None,
22232 schema: Some("ignored_on_sqlite".to_owned()),
22233 dtype: None,
22234 method: SqlInsertMethod::Single,
22235 chunksize: None,
22236 },
22237 )
22238 .expect("write with schema=Some on SQLite");
22239 let back = read_sql_table(&conn, "bare_write_tbl").expect("read");
22240 let col = back.column("x").expect("x");
22241 assert_eq!(col.values()[0], Scalar::Int64(1));
22242 assert_eq!(col.values()[1], Scalar::Int64(2));
22243 }
22244
22245 #[test]
22246 fn sql_create_table_query_in_schema_validates_schema_name() {
22247 struct PgLikeValidate;
22248 impl super::SqlConnection for PgLikeValidate {
22249 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22250 Ok(super::SqlQueryResult {
22251 columns: vec![],
22252 rows: vec![],
22253 })
22254 }
22255 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22256 Ok(())
22257 }
22258 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22259 Ok(false)
22260 }
22261 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22262 Ok(())
22263 }
22264 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22265 "TEXT"
22266 }
22267 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22268 "TEXT"
22269 }
22270 fn supports_schemas(&self) -> bool {
22271 true
22272 }
22273 }
22274 let conn = PgLikeValidate;
22275 let cols = vec!["x INTEGER".to_owned()];
22276 let err =
22277 super::sql_create_table_query_in_schema(&conn, "users", Some("evil; DROP"), &cols)
22278 .expect_err("malformed schema must reject");
22279 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid schema name")));
22282 }
22283
22284 #[test]
22287 fn sql_drop_table_query_bare_on_non_multi_schema() {
22288 struct StubSql;
22289 impl super::SqlConnection for StubSql {
22290 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22291 Ok(super::SqlQueryResult {
22292 columns: vec![],
22293 rows: vec![],
22294 })
22295 }
22296 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22297 Ok(())
22298 }
22299 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22300 Ok(false)
22301 }
22302 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22303 Ok(())
22304 }
22305 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22306 "TEXT"
22307 }
22308 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22309 "TEXT"
22310 }
22311 }
22312 let conn = StubSql;
22313 let q = super::sql_drop_table_query_in_schema(&conn, "users", None).expect("drop none");
22314 assert_eq!(q, "DROP TABLE IF EXISTS \"users\"");
22315 let q2 =
22317 super::sql_drop_table_query_in_schema(&conn, "users", Some("ignored")).expect("drop");
22318 assert_eq!(q2, "DROP TABLE IF EXISTS \"users\"");
22319 }
22320
22321 #[test]
22322 fn sql_drop_table_query_qualifies_on_multi_schema_backend() {
22323 struct PgLikeDrop;
22324 impl super::SqlConnection for PgLikeDrop {
22325 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22326 Ok(super::SqlQueryResult {
22327 columns: vec![],
22328 rows: vec![],
22329 })
22330 }
22331 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22332 Ok(())
22333 }
22334 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22335 Ok(false)
22336 }
22337 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22338 Ok(())
22339 }
22340 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22341 "TEXT"
22342 }
22343 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22344 "TEXT"
22345 }
22346 fn supports_schemas(&self) -> bool {
22347 true
22348 }
22349 }
22350 let conn = PgLikeDrop;
22351 let q = super::sql_drop_table_query_in_schema(&conn, "users", Some("analytics"))
22352 .expect("drop qualified");
22353 assert_eq!(q, "DROP TABLE IF EXISTS \"analytics\".\"users\"");
22354 let bare = super::sql_drop_table_query_in_schema(&conn, "users", None).expect("drop bare");
22355 assert_eq!(bare, "DROP TABLE IF EXISTS \"users\"");
22356 }
22357
22358 #[cfg(feature = "sql-sqlite")]
22359 #[test]
22360 fn write_sql_replace_with_schema_silently_ignored_on_sqlite() {
22361 let conn = make_sql_test_conn();
22365 super::SqlConnection::execute_batch(
22366 &conn,
22367 "CREATE TABLE replace_tbl (x INTEGER); INSERT INTO replace_tbl VALUES (99);",
22368 )
22369 .unwrap();
22370 let frame = fp_frame::DataFrame::from_dict(
22371 &["x"],
22372 vec![("x", vec![Scalar::Int64(1), Scalar::Int64(2)])],
22373 )
22374 .unwrap();
22375 write_sql_with_options(
22376 &frame,
22377 &conn,
22378 "replace_tbl",
22379 &SqlWriteOptions {
22380 if_exists: SqlIfExists::Replace,
22381 index: false,
22382 index_label: None,
22383 schema: Some("ignored_on_sqlite".to_owned()),
22384 dtype: None,
22385 method: SqlInsertMethod::Single,
22386 chunksize: None,
22387 },
22388 )
22389 .expect("replace + schema=Some on SQLite");
22390 let back = read_sql_table(&conn, "replace_tbl").expect("read");
22391 let col = back.column("x").expect("x");
22392 assert_eq!(col.values().len(), 2);
22394 assert_eq!(col.values()[0], Scalar::Int64(1));
22395 assert_eq!(col.values()[1], Scalar::Int64(2));
22396 }
22397
22398 #[test]
22401 fn default_table_exists_in_schema_delegates_to_table_exists() {
22402 struct StubExistsTrue;
22405 impl super::SqlConnection for StubExistsTrue {
22406 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22407 Ok(super::SqlQueryResult {
22408 columns: vec![],
22409 rows: vec![],
22410 })
22411 }
22412 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22413 Ok(())
22414 }
22415 fn table_exists(&self, name: &str) -> Result<bool, IoError> {
22416 Ok(name == "users")
22417 }
22418 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22419 Ok(())
22420 }
22421 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22422 "TEXT"
22423 }
22424 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22425 "TEXT"
22426 }
22427 }
22428 let conn = StubExistsTrue;
22429 assert!(super::SqlConnection::table_exists_in_schema(&conn, "users", None).unwrap());
22431 assert!(
22432 super::SqlConnection::table_exists_in_schema(&conn, "users", Some("ignored")).unwrap()
22433 );
22434 assert!(!super::SqlConnection::table_exists_in_schema(&conn, "missing", None).unwrap());
22435 }
22436
22437 #[test]
22438 fn multi_schema_override_scopes_table_exists() {
22439 struct PgLikeSchemaCheck;
22442 impl super::SqlConnection for PgLikeSchemaCheck {
22443 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22444 Ok(super::SqlQueryResult {
22445 columns: vec![],
22446 rows: vec![],
22447 })
22448 }
22449 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22450 Ok(())
22451 }
22452 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22453 Ok(false)
22455 }
22456 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22457 Ok(())
22458 }
22459 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22460 "TEXT"
22461 }
22462 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22463 "TEXT"
22464 }
22465 fn supports_schemas(&self) -> bool {
22466 true
22467 }
22468 fn table_exists_in_schema(
22469 &self,
22470 table: &str,
22471 schema: Option<&str>,
22472 ) -> Result<bool, IoError> {
22473 Ok(table == "users" && schema == Some("analytics"))
22474 }
22475 }
22476 let conn = PgLikeSchemaCheck;
22477 assert!(
22478 super::SqlConnection::table_exists_in_schema(&conn, "users", Some("analytics"))
22479 .unwrap()
22480 );
22481 assert!(
22483 !super::SqlConnection::table_exists_in_schema(&conn, "users", Some("audit")).unwrap()
22484 );
22485 assert!(!super::SqlConnection::table_exists_in_schema(&conn, "users", None).unwrap());
22487 }
22488
22489 #[cfg(feature = "sql-sqlite")]
22490 #[test]
22491 fn write_sql_fail_with_schema_some_still_rejects_existing_on_sqlite() {
22492 let conn = make_sql_test_conn();
22495 super::SqlConnection::execute_batch(&conn, "CREATE TABLE preexists_tbl (x INTEGER);")
22496 .unwrap();
22497 let frame =
22498 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22499 let err = write_sql_with_options(
22500 &frame,
22501 &conn,
22502 "preexists_tbl",
22503 &SqlWriteOptions {
22504 if_exists: SqlIfExists::Fail,
22505 index: false,
22506 index_label: None,
22507 schema: Some("ignored_on_sqlite".to_owned()),
22508 dtype: None,
22509 method: SqlInsertMethod::Single,
22510 chunksize: None,
22511 },
22512 )
22513 .expect_err("Fail branch must still reject pre-existing");
22514 assert!(matches!(err, IoError::Sql(msg) if msg.contains("already exists")));
22515 }
22516
22517 #[cfg(feature = "sql-sqlite")]
22520 #[test]
22521 fn write_sql_dtype_override_emits_custom_sql_type() {
22522 let conn = make_sql_test_conn();
22526 let frame = fp_frame::DataFrame::from_dict(
22527 &["amount"],
22528 vec![("amount", vec![Scalar::Int64(100), Scalar::Int64(250)])],
22529 )
22530 .unwrap();
22531 let mut overrides = BTreeMap::new();
22532 overrides.insert("amount".to_owned(), "NUMERIC(10,2)".to_owned());
22533 write_sql_with_options(
22534 &frame,
22535 &conn,
22536 "money_tbl",
22537 &SqlWriteOptions {
22538 if_exists: SqlIfExists::Fail,
22539 index: false,
22540 index_label: None,
22541 schema: None,
22542 dtype: Some(overrides),
22543 method: SqlInsertMethod::Single,
22544 chunksize: None,
22545 },
22546 )
22547 .expect("write with dtype override");
22548 let sm = super::SqlConnection::query(
22549 &conn,
22550 "SELECT sql FROM sqlite_master WHERE name = 'money_tbl'",
22551 &[],
22552 )
22553 .unwrap();
22554 let create_sql = match &sm.rows[0][0] {
22555 Scalar::Utf8(s) => s.clone(),
22556 other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22557 };
22558 assert!(
22559 create_sql.contains("NUMERIC(10,2)"),
22560 "expected override to land in CREATE TABLE; got: {create_sql}"
22561 );
22562 }
22563
22564 #[cfg(feature = "sql-sqlite")]
22565 #[test]
22566 fn write_sql_dtype_override_multiple_columns() {
22567 let conn = make_sql_test_conn();
22568 let frame = fp_frame::DataFrame::from_dict(
22569 &["a", "b"],
22570 vec![
22571 ("a", vec![Scalar::Int64(1)]),
22572 ("b", vec![Scalar::Float64(1.5)]),
22573 ],
22574 )
22575 .unwrap();
22576 let mut overrides = BTreeMap::new();
22577 overrides.insert("a".to_owned(), "BIGINT".to_owned());
22578 overrides.insert("b".to_owned(), "DECIMAL(8,4)".to_owned());
22579 write_sql_with_options(
22580 &frame,
22581 &conn,
22582 "multi_tbl",
22583 &SqlWriteOptions {
22584 if_exists: SqlIfExists::Fail,
22585 index: false,
22586 index_label: None,
22587 schema: None,
22588 dtype: Some(overrides),
22589 method: SqlInsertMethod::Single,
22590 chunksize: None,
22591 },
22592 )
22593 .expect("write with multi-column overrides");
22594 let sm = super::SqlConnection::query(
22595 &conn,
22596 "SELECT sql FROM sqlite_master WHERE name = 'multi_tbl'",
22597 &[],
22598 )
22599 .unwrap();
22600 let create_sql = match &sm.rows[0][0] {
22601 Scalar::Utf8(s) => s.clone(),
22602 other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22603 };
22604 assert!(create_sql.contains("BIGINT"));
22605 assert!(create_sql.contains("DECIMAL(8,4)"));
22606 }
22607
22608 #[cfg(feature = "sql-sqlite")]
22609 #[test]
22610 fn write_sql_dtype_override_for_missing_column_silently_ignored() {
22611 let conn = make_sql_test_conn();
22612 let frame =
22613 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22614 let mut overrides = BTreeMap::new();
22615 overrides.insert("nonexistent".to_owned(), "BIGINT".to_owned());
22616 write_sql_with_options(
22618 &frame,
22619 &conn,
22620 "missing_col_tbl",
22621 &SqlWriteOptions {
22622 if_exists: SqlIfExists::Fail,
22623 index: false,
22624 index_label: None,
22625 schema: None,
22626 dtype: Some(overrides),
22627 method: SqlInsertMethod::Single,
22628 chunksize: None,
22629 },
22630 )
22631 .expect("write with override on missing col");
22632 let sm = super::SqlConnection::query(
22634 &conn,
22635 "SELECT sql FROM sqlite_master WHERE name = 'missing_col_tbl'",
22636 &[],
22637 )
22638 .unwrap();
22639 let create_sql = match &sm.rows[0][0] {
22640 Scalar::Utf8(s) => s.clone(),
22641 other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22642 };
22643 assert!(create_sql.contains("INTEGER"));
22644 assert!(!create_sql.contains("BIGINT"));
22645 }
22646
22647 #[cfg(feature = "sql-sqlite")]
22648 #[test]
22649 fn write_sql_dtype_none_falls_back_to_inferred_type() {
22650 let conn = make_sql_test_conn();
22651 let frame =
22652 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22653 write_sql_with_options(
22654 &frame,
22655 &conn,
22656 "no_override_tbl",
22657 &SqlWriteOptions {
22658 if_exists: SqlIfExists::Fail,
22659 index: false,
22660 index_label: None,
22661 schema: None,
22662 dtype: None,
22663 method: SqlInsertMethod::Single,
22664 chunksize: None,
22665 },
22666 )
22667 .expect("write without override");
22668 let sm = super::SqlConnection::query(
22669 &conn,
22670 "SELECT sql FROM sqlite_master WHERE name = 'no_override_tbl'",
22671 &[],
22672 )
22673 .unwrap();
22674 let create_sql = match &sm.rows[0][0] {
22675 Scalar::Utf8(s) => s.clone(),
22676 other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22677 };
22678 assert!(create_sql.contains("INTEGER"));
22680 }
22681
22682 #[cfg(feature = "sql-sqlite")]
22685 #[test]
22686 fn write_sql_multi_round_trip_matches_single() {
22687 let frame = fp_frame::DataFrame::from_dict(
22691 &["id", "name", "amount"],
22692 vec![
22693 (
22694 "id",
22695 vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)],
22696 ),
22697 (
22698 "name",
22699 vec![
22700 Scalar::Utf8("alice".into()),
22701 Scalar::Utf8("bob".into()),
22702 Scalar::Utf8("carol".into()),
22703 ],
22704 ),
22705 (
22706 "amount",
22707 vec![
22708 Scalar::Float64(1.5),
22709 Scalar::Float64(2.5),
22710 Scalar::Float64(3.5),
22711 ],
22712 ),
22713 ],
22714 )
22715 .unwrap();
22716
22717 let conn_single = make_sql_test_conn();
22718 write_sql_with_options(
22719 &frame,
22720 &conn_single,
22721 "single_tbl",
22722 &SqlWriteOptions {
22723 if_exists: SqlIfExists::Fail,
22724 index: false,
22725 index_label: None,
22726 schema: None,
22727 dtype: None,
22728 method: SqlInsertMethod::Single,
22729 chunksize: None,
22730 },
22731 )
22732 .unwrap();
22733 let single = read_sql(&conn_single, "SELECT * FROM single_tbl ORDER BY id").unwrap();
22734
22735 let conn_multi = make_sql_test_conn();
22736 write_sql_with_options(
22737 &frame,
22738 &conn_multi,
22739 "multi_tbl",
22740 &SqlWriteOptions {
22741 if_exists: SqlIfExists::Fail,
22742 index: false,
22743 index_label: None,
22744 schema: None,
22745 dtype: None,
22746 method: SqlInsertMethod::Multi,
22747 chunksize: None,
22748 },
22749 )
22750 .unwrap();
22751 let multi = read_sql(&conn_multi, "SELECT * FROM multi_tbl ORDER BY id").unwrap();
22752
22753 assert_eq!(single.column_names(), multi.column_names());
22754 for name in single.column_names() {
22755 let s = single.column(name).unwrap().values().to_vec();
22756 let m = multi.column(name).unwrap().values().to_vec();
22757 assert_eq!(s, m, "column {name} diverged between Single and Multi");
22758 }
22759 }
22760
22761 #[test]
22762 fn sql_multi_row_insert_query_emits_correct_placeholder_count() {
22763 struct PgLikeStub;
22765 impl super::SqlConnection for PgLikeStub {
22766 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22767 Ok(SqlQueryResult {
22768 columns: vec![],
22769 rows: vec![],
22770 })
22771 }
22772 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22773 Ok(())
22774 }
22775 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22776 Ok(false)
22777 }
22778 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22779 Ok(())
22780 }
22781 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22782 "TEXT"
22783 }
22784 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22785 "TEXT"
22786 }
22787 fn parameter_marker(&self, ordinal: usize) -> String {
22788 format!("${ordinal}")
22789 }
22790 }
22791 let conn = PgLikeStub;
22792 let cols = vec!["a".to_owned(), "b".to_owned()];
22793 let sql =
22794 super::sql_multi_row_insert_query_in_schema(&conn, "tbl", None, &cols, 3).unwrap();
22795 assert!(
22797 sql.contains("VALUES ($1, $2), ($3, $4), ($5, $6)"),
22798 "got: {sql}"
22799 );
22800 }
22801
22802 #[test]
22803 fn sql_multi_row_insert_query_rejects_zero_rows() {
22804 struct StubConn;
22805 impl super::SqlConnection for StubConn {
22806 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22807 Ok(SqlQueryResult {
22808 columns: vec![],
22809 rows: vec![],
22810 })
22811 }
22812 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22813 Ok(())
22814 }
22815 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22816 Ok(false)
22817 }
22818 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22819 Ok(())
22820 }
22821 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22822 "TEXT"
22823 }
22824 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22825 "TEXT"
22826 }
22827 }
22828 let conn = StubConn;
22829 let cols = vec!["a".to_owned()];
22830 let err = super::sql_multi_row_insert_query_in_schema(&conn, "tbl", None, &cols, 0)
22831 .expect_err("zero rows must be rejected");
22832 assert!(matches!(err, IoError::Sql(msg) if msg.contains("at least one row")));
22833 }
22834
22835 #[cfg(feature = "sql-sqlite")]
22836 #[test]
22837 fn write_sql_multi_chunks_at_max_param_boundary() {
22838 use std::cell::RefCell;
22842 struct ChunkRecorder {
22843 statements: RefCell<Vec<String>>,
22844 row_counts: RefCell<Vec<usize>>,
22845 }
22846 impl super::SqlConnection for ChunkRecorder {
22847 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22848 Ok(SqlQueryResult {
22849 columns: vec![],
22850 rows: vec![],
22851 })
22852 }
22853 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22854 Ok(())
22855 }
22856 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22857 Ok(false)
22858 }
22859 fn insert_rows(&self, sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22860 self.statements.borrow_mut().push(sql.to_owned());
22861 self.row_counts
22862 .borrow_mut()
22863 .push(rows.first().map_or(0, std::vec::Vec::len));
22864 Ok(())
22865 }
22866 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22867 "TEXT"
22868 }
22869 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22870 "TEXT"
22871 }
22872 fn max_param_count(&self) -> Option<usize> {
22873 Some(4)
22875 }
22876 }
22877 let conn = ChunkRecorder {
22878 statements: RefCell::new(vec![]),
22879 row_counts: RefCell::new(vec![]),
22880 };
22881 let frame = fp_frame::DataFrame::from_dict(
22882 &["a", "b"],
22883 vec![
22884 (
22885 "a",
22886 vec![
22887 Scalar::Int64(1),
22888 Scalar::Int64(2),
22889 Scalar::Int64(3),
22890 Scalar::Int64(4),
22891 Scalar::Int64(5),
22892 ],
22893 ),
22894 (
22895 "b",
22896 vec![
22897 Scalar::Int64(10),
22898 Scalar::Int64(20),
22899 Scalar::Int64(30),
22900 Scalar::Int64(40),
22901 Scalar::Int64(50),
22902 ],
22903 ),
22904 ],
22905 )
22906 .unwrap();
22907 write_sql_with_options(
22908 &frame,
22909 &conn,
22910 "chunked",
22911 &SqlWriteOptions {
22912 if_exists: SqlIfExists::Fail,
22913 index: false,
22914 index_label: None,
22915 schema: None,
22916 dtype: None,
22917 method: SqlInsertMethod::Multi,
22918 chunksize: None,
22919 },
22920 )
22921 .unwrap();
22922 let stmts = conn.statements.borrow();
22924 let counts = conn.row_counts.borrow();
22925 assert_eq!(stmts.len(), 3, "expected 3 chunked INSERTs");
22926 assert_eq!(counts.as_slice(), &[4, 4, 2]);
22928 }
22929
22930 #[cfg(feature = "sql-sqlite")]
22931 #[test]
22932 fn write_sql_multi_no_max_param_sends_single_statement() {
22933 use std::cell::RefCell;
22936 struct UnboundedStub {
22937 statements: RefCell<Vec<String>>,
22938 }
22939 impl super::SqlConnection for UnboundedStub {
22940 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22941 Ok(SqlQueryResult {
22942 columns: vec![],
22943 rows: vec![],
22944 })
22945 }
22946 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22947 Ok(())
22948 }
22949 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22950 Ok(false)
22951 }
22952 fn insert_rows(&self, sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22953 self.statements.borrow_mut().push(sql.to_owned());
22954 Ok(())
22955 }
22956 fn dtype_sql(&self, _dtype: DType) -> &'static str {
22957 "TEXT"
22958 }
22959 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22960 "TEXT"
22961 }
22962 fn max_param_count(&self) -> Option<usize> {
22963 None
22964 }
22965 }
22966 let conn = UnboundedStub {
22967 statements: RefCell::new(vec![]),
22968 };
22969 let frame = fp_frame::DataFrame::from_dict(
22970 &["x"],
22971 vec![(
22972 "x",
22973 vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)],
22974 )],
22975 )
22976 .unwrap();
22977 write_sql_with_options(
22978 &frame,
22979 &conn,
22980 "uncapped",
22981 &SqlWriteOptions {
22982 if_exists: SqlIfExists::Fail,
22983 index: false,
22984 index_label: None,
22985 schema: None,
22986 dtype: None,
22987 method: SqlInsertMethod::Multi,
22988 chunksize: None,
22989 },
22990 )
22991 .unwrap();
22992 let stmts = conn.statements.borrow();
22993 assert_eq!(stmts.len(), 1, "expected exactly one multi-row INSERT");
22994 let stmt = &stmts[0];
22996 assert_eq!(
22997 stmt.matches("(?)").count(),
22998 3,
22999 "expected 3 row tuples in: {stmt}"
23000 );
23001 }
23002
23003 #[cfg(feature = "sql-sqlite")]
23004 #[test]
23005 fn write_sql_multi_preserves_nulls() {
23006 let frame = fp_frame::DataFrame::from_dict(
23008 &["a", "b"],
23009 vec![
23010 (
23011 "a",
23012 vec![
23013 Scalar::Int64(1),
23014 Scalar::Null(NullKind::Null),
23015 Scalar::Int64(3),
23016 ],
23017 ),
23018 (
23019 "b",
23020 vec![
23021 Scalar::Utf8("x".into()),
23022 Scalar::Utf8("y".into()),
23023 Scalar::Null(NullKind::Null),
23024 ],
23025 ),
23026 ],
23027 )
23028 .unwrap();
23029 let conn = make_sql_test_conn();
23030 write_sql_with_options(
23031 &frame,
23032 &conn,
23033 "nulls_tbl",
23034 &SqlWriteOptions {
23035 if_exists: SqlIfExists::Fail,
23036 index: false,
23037 index_label: None,
23038 schema: None,
23039 dtype: None,
23040 method: SqlInsertMethod::Multi,
23041 chunksize: None,
23042 },
23043 )
23044 .unwrap();
23045 let back = read_sql(&conn, "SELECT a, b FROM nulls_tbl ORDER BY rowid").unwrap();
23046 let a = back.column("a").unwrap().values();
23047 let b = back.column("b").unwrap().values();
23048 assert_eq!(a[0], Scalar::Int64(1));
23049 assert!(matches!(a[1], Scalar::Null(_)));
23050 assert_eq!(a[2], Scalar::Int64(3));
23051 assert_eq!(b[0], Scalar::Utf8("x".into()));
23052 assert_eq!(b[1], Scalar::Utf8("y".into()));
23053 assert!(matches!(b[2], Scalar::Null(_)));
23054 }
23055
23056 #[test]
23057 fn sql_insert_method_default_is_single() {
23058 assert_eq!(SqlInsertMethod::default(), SqlInsertMethod::Single);
23059 }
23060
23061 #[cfg(feature = "sql-sqlite")]
23064 #[test]
23065 fn list_sql_tables_empty_db_returns_empty_vec() {
23066 let conn = make_sql_test_conn();
23067 let tables = list_sql_tables(&conn, None).unwrap();
23068 assert!(tables.is_empty(), "expected no tables; got {tables:?}");
23069 }
23070
23071 #[cfg(feature = "sql-sqlite")]
23072 #[test]
23073 fn list_sql_tables_returns_user_tables_sorted() {
23074 let conn = make_sql_test_conn();
23075 super::SqlConnection::execute_batch(&conn, "CREATE TABLE zebra (x INTEGER);").unwrap();
23076 super::SqlConnection::execute_batch(&conn, "CREATE TABLE alpha (y TEXT);").unwrap();
23077 super::SqlConnection::execute_batch(&conn, "CREATE TABLE mango (z REAL);").unwrap();
23078 let tables = list_sql_tables(&conn, None).unwrap();
23079 assert_eq!(tables, vec!["alpha", "mango", "zebra"]);
23080 }
23081
23082 #[cfg(feature = "sql-sqlite")]
23083 #[test]
23084 fn list_sql_tables_excludes_sqlite_internal_tables() {
23085 let conn = make_sql_test_conn();
23086 super::SqlConnection::execute_batch(
23089 &conn,
23090 "CREATE TABLE seq_demo (id INTEGER PRIMARY KEY AUTOINCREMENT, v TEXT);",
23091 )
23092 .unwrap();
23093 super::SqlConnection::execute_batch(&conn, "INSERT INTO seq_demo (v) VALUES ('one');")
23094 .unwrap();
23095 let tables = list_sql_tables(&conn, None).unwrap();
23096 assert_eq!(tables, vec!["seq_demo"]);
23097 assert!(!tables.iter().any(|name| name.starts_with("sqlite_")));
23098 }
23099
23100 #[cfg(feature = "sql-sqlite")]
23101 #[test]
23102 fn list_sql_tables_keeps_user_tables_with_sqlite_prefix_no_underscore() {
23103 let conn = make_sql_test_conn();
23110 super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqliteX (x INTEGER);").unwrap();
23111 super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqliteY (y TEXT);").unwrap();
23112 super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqlite1234 (z REAL);").unwrap();
23113 let tables = list_sql_tables(&conn, None).unwrap();
23114 assert_eq!(tables, vec!["sqlite1234", "sqliteX", "sqliteY"]);
23115 }
23116
23117 #[cfg(feature = "sql-sqlite")]
23118 #[test]
23119 fn list_sql_views_keeps_user_views_with_sqlite_prefix_no_underscore() {
23120 let conn = make_sql_test_conn();
23123 super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (x INTEGER);").unwrap();
23124 super::SqlConnection::execute_batch(
23125 &conn,
23126 "CREATE VIEW sqliteX_view AS SELECT x FROM base;",
23127 )
23128 .unwrap();
23129 let views = list_sql_views(&conn, None).unwrap();
23130 assert_eq!(views, vec!["sqliteX_view"]);
23133 }
23134
23135 #[cfg(feature = "sql-sqlite")]
23136 #[test]
23137 fn list_sql_tables_schema_silently_ignored_on_sqlite() {
23138 let conn = make_sql_test_conn();
23142 super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_one (x INTEGER);").unwrap();
23143 let with_schema =
23144 list_sql_tables(&conn, Some("ignored_on_sqlite")).expect("schema arg must not error");
23145 let without_schema = list_sql_tables(&conn, None).unwrap();
23146 assert_eq!(with_schema, without_schema);
23147 }
23148
23149 #[test]
23150 fn list_sql_tables_default_impl_returns_empty() {
23151 struct NoIntrospection;
23154 impl super::SqlConnection for NoIntrospection {
23155 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23156 Ok(SqlQueryResult {
23157 columns: vec![],
23158 rows: vec![],
23159 })
23160 }
23161 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23162 Ok(())
23163 }
23164 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23165 Ok(false)
23166 }
23167 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23168 Ok(())
23169 }
23170 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23171 "TEXT"
23172 }
23173 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23174 "TEXT"
23175 }
23176 }
23177 let conn = NoIntrospection;
23178 let tables = list_sql_tables(&conn, None).unwrap();
23179 assert!(tables.is_empty());
23180 let with_schema = list_sql_tables(&conn, Some("any")).unwrap();
23181 assert!(with_schema.is_empty());
23182 }
23183
23184 #[test]
23185 fn list_sql_tables_routes_schema_to_backend_override() {
23186 struct MultiSchema;
23188 impl super::SqlConnection for MultiSchema {
23189 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23190 Ok(SqlQueryResult {
23191 columns: vec![],
23192 rows: vec![],
23193 })
23194 }
23195 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23196 Ok(())
23197 }
23198 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23199 Ok(false)
23200 }
23201 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23202 Ok(())
23203 }
23204 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23205 "TEXT"
23206 }
23207 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23208 "TEXT"
23209 }
23210 fn supports_schemas(&self) -> bool {
23211 true
23212 }
23213 fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
23214 Ok(match schema {
23215 Some("analytics") => {
23216 vec!["users".to_owned(), "events".to_owned()]
23217 }
23218 Some("audit") => vec!["logs".to_owned()],
23219 Some(_) => vec![],
23220 None => vec!["public_table".to_owned()],
23221 })
23222 }
23223 }
23224 let conn = MultiSchema;
23225 assert_eq!(
23226 list_sql_tables(&conn, Some("analytics")).unwrap(),
23227 vec!["users", "events"]
23228 );
23229 assert_eq!(list_sql_tables(&conn, Some("audit")).unwrap(), vec!["logs"]);
23230 assert_eq!(
23231 list_sql_tables(&conn, Some("missing")).unwrap(),
23232 Vec::<String>::new()
23233 );
23234 assert_eq!(list_sql_tables(&conn, None).unwrap(), vec!["public_table"]);
23235 }
23236
23237 #[cfg(feature = "sql-sqlite")]
23240 #[test]
23241 fn sql_table_schema_unknown_table_returns_none() {
23242 let conn = make_sql_test_conn();
23243 let result = sql_table_schema(&conn, "no_such_table", None).unwrap();
23244 assert!(result.is_none());
23245 }
23246
23247 #[cfg(feature = "sql-sqlite")]
23248 #[test]
23249 fn sql_table_schema_simple_table() {
23250 let conn = make_sql_test_conn();
23251 super::SqlConnection::execute_batch(&conn, "CREATE TABLE simple (id INTEGER, name TEXT);")
23252 .unwrap();
23253 let schema = sql_table_schema(&conn, "simple", None).unwrap().unwrap();
23254 assert_eq!(schema.table_name, "simple");
23255 assert_eq!(schema.columns.len(), 2);
23256 assert_eq!(schema.columns[0].name, "id");
23257 assert_eq!(schema.columns[0].declared_type.as_deref(), Some("INTEGER"));
23258 assert!(schema.columns[0].nullable);
23259 assert!(schema.columns[0].primary_key_ordinal.is_none());
23260 assert_eq!(schema.columns[1].name, "name");
23261 assert_eq!(schema.columns[1].declared_type.as_deref(), Some("TEXT"));
23262 assert!(schema.columns[1].nullable);
23263 }
23264
23265 #[cfg(feature = "sql-sqlite")]
23266 #[test]
23267 fn sql_table_schema_pk_notnull_default() {
23268 let conn = make_sql_test_conn();
23269 super::SqlConnection::execute_batch(
23270 &conn,
23271 "CREATE TABLE meta ( \
23272 id INTEGER PRIMARY KEY, \
23273 name TEXT NOT NULL, \
23274 status TEXT DEFAULT 'active' \
23275 );",
23276 )
23277 .unwrap();
23278 let schema = sql_table_schema(&conn, "meta", None).unwrap().unwrap();
23279 assert_eq!(schema.columns.len(), 3);
23280
23281 let id = schema.column("id").expect("id col");
23282 assert_eq!(id.primary_key_ordinal, Some(0));
23283 let name = schema.column("name").expect("name col");
23289 assert!(!name.nullable);
23290 assert!(name.default_value.is_none());
23291 assert!(name.primary_key_ordinal.is_none());
23292
23293 let status = schema.column("status").expect("status col");
23294 assert!(status.nullable);
23295 assert_eq!(
23296 status.default_value.as_deref(),
23297 Some("'active'"),
23298 "expected SQL literal default text"
23299 );
23300 }
23301
23302 #[cfg(feature = "sql-sqlite")]
23303 #[test]
23304 fn sql_table_schema_schema_silently_ignored_on_sqlite() {
23305 let conn = make_sql_test_conn();
23306 super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_one (x INTEGER);").unwrap();
23307 let with_schema = sql_table_schema(&conn, "only_one", Some("ignored_on_sqlite"))
23308 .expect("schema arg must not error")
23309 .expect("table exists");
23310 let without_schema = sql_table_schema(&conn, "only_one", None).unwrap().unwrap();
23311 assert_eq!(with_schema, without_schema);
23312 }
23313
23314 #[cfg(feature = "sql-sqlite")]
23315 #[test]
23316 fn sql_table_schema_rejects_invalid_table_name() {
23317 let conn = make_sql_test_conn();
23320 let err = sql_table_schema(&conn, "x; DROP TABLE users", None).expect_err("must reject");
23321 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
23322 }
23323
23324 #[test]
23325 fn sql_table_schema_default_impl_returns_none() {
23326 struct NoIntrospection;
23327 impl super::SqlConnection for NoIntrospection {
23328 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23329 Ok(SqlQueryResult {
23330 columns: vec![],
23331 rows: vec![],
23332 })
23333 }
23334 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23335 Ok(())
23336 }
23337 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23338 Ok(false)
23339 }
23340 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23341 Ok(())
23342 }
23343 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23344 "TEXT"
23345 }
23346 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23347 "TEXT"
23348 }
23349 }
23350 let conn = NoIntrospection;
23351 assert!(sql_table_schema(&conn, "anything", None).unwrap().is_none());
23352 }
23353
23354 #[test]
23355 fn sql_table_schema_routes_schema_to_backend_override() {
23356 struct MultiSchema;
23357 impl super::SqlConnection for MultiSchema {
23358 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23359 Ok(SqlQueryResult {
23360 columns: vec![],
23361 rows: vec![],
23362 })
23363 }
23364 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23365 Ok(())
23366 }
23367 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23368 Ok(false)
23369 }
23370 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23371 Ok(())
23372 }
23373 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23374 "TEXT"
23375 }
23376 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23377 "TEXT"
23378 }
23379 fn supports_schemas(&self) -> bool {
23380 true
23381 }
23382 fn table_schema(
23383 &self,
23384 table: &str,
23385 schema: Option<&str>,
23386 ) -> Result<Option<SqlTableSchema>, IoError> {
23387 if table == "users" && schema == Some("analytics") {
23388 Ok(Some(SqlTableSchema {
23389 table_name: "users".to_owned(),
23390 columns: vec![SqlColumnSchema {
23391 name: "id".to_owned(),
23392 declared_type: Some("BIGINT".to_owned()),
23393 nullable: false,
23394 default_value: None,
23395 primary_key_ordinal: Some(0),
23396 comment: None,
23397 autoincrement: false,
23398 }],
23399 }))
23400 } else {
23401 Ok(None)
23402 }
23403 }
23404 }
23405 let conn = MultiSchema;
23406 let analytics_users = sql_table_schema(&conn, "users", Some("analytics"))
23407 .unwrap()
23408 .expect("found");
23409 assert_eq!(
23410 analytics_users.columns[0].declared_type.as_deref(),
23411 Some("BIGINT")
23412 );
23413 assert!(
23414 sql_table_schema(&conn, "users", Some("audit"))
23415 .unwrap()
23416 .is_none()
23417 );
23418 assert!(sql_table_schema(&conn, "users", None).unwrap().is_none());
23419 }
23420
23421 #[cfg(feature = "sql-sqlite")]
23424 #[test]
23425 fn list_sql_schemas_returns_empty_on_sqlite() {
23426 let conn = make_sql_test_conn();
23429 let schemas = list_sql_schemas(&conn).unwrap();
23430 assert!(schemas.is_empty(), "expected no schemas; got {schemas:?}");
23431 }
23432
23433 #[test]
23434 fn list_sql_schemas_default_impl_returns_empty() {
23435 struct NoIntrospection;
23436 impl super::SqlConnection for NoIntrospection {
23437 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23438 Ok(SqlQueryResult {
23439 columns: vec![],
23440 rows: vec![],
23441 })
23442 }
23443 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23444 Ok(())
23445 }
23446 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23447 Ok(false)
23448 }
23449 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23450 Ok(())
23451 }
23452 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23453 "TEXT"
23454 }
23455 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23456 "TEXT"
23457 }
23458 }
23459 let conn = NoIntrospection;
23460 assert!(list_sql_schemas(&conn).unwrap().is_empty());
23461 }
23462
23463 #[test]
23464 fn list_sql_schemas_routes_to_backend_override() {
23465 struct MultiSchemaServer;
23468 impl super::SqlConnection for MultiSchemaServer {
23469 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23470 Ok(SqlQueryResult {
23471 columns: vec![],
23472 rows: vec![],
23473 })
23474 }
23475 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23476 Ok(())
23477 }
23478 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23479 Ok(false)
23480 }
23481 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23482 Ok(())
23483 }
23484 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23485 "TEXT"
23486 }
23487 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23488 "TEXT"
23489 }
23490 fn supports_schemas(&self) -> bool {
23491 true
23492 }
23493 fn list_schemas(&self) -> Result<Vec<String>, IoError> {
23494 Ok(vec![
23496 "public".to_owned(),
23497 "analytics".to_owned(),
23498 "audit".to_owned(),
23499 ])
23500 }
23501 }
23502 let conn = MultiSchemaServer;
23503 let schemas = list_sql_schemas(&conn).unwrap();
23504 assert_eq!(schemas, vec!["public", "analytics", "audit"]);
23505 assert!(!schemas.iter().any(|s| s.starts_with("pg_")));
23509 assert!(!schemas.iter().any(|s| s == "information_schema"));
23510 }
23511
23512 #[test]
23513 fn list_sql_schemas_propagates_backend_error() {
23514 struct BrokenIntrospection;
23517 impl super::SqlConnection for BrokenIntrospection {
23518 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23519 Ok(SqlQueryResult {
23520 columns: vec![],
23521 rows: vec![],
23522 })
23523 }
23524 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23525 Ok(())
23526 }
23527 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23528 Ok(false)
23529 }
23530 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23531 Ok(())
23532 }
23533 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23534 "TEXT"
23535 }
23536 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23537 "TEXT"
23538 }
23539 fn list_schemas(&self) -> Result<Vec<String>, IoError> {
23540 Err(IoError::Sql("permission denied for catalog".to_owned()))
23541 }
23542 }
23543 let conn = BrokenIntrospection;
23544 let err = list_sql_schemas(&conn).expect_err("should surface backend error");
23545 assert!(matches!(err, IoError::Sql(msg) if msg.contains("permission denied")));
23546 }
23547
23548 #[cfg(feature = "sql-sqlite")]
23551 #[test]
23552 fn truncate_sql_table_clears_rows_but_preserves_schema() {
23553 let conn = make_sql_test_conn();
23554 super::SqlConnection::execute_batch(&conn, "CREATE TABLE rolling (id INTEGER, val TEXT);")
23555 .unwrap();
23556 super::SqlConnection::execute_batch(
23557 &conn,
23558 "INSERT INTO rolling VALUES (1, 'a'), (2, 'b'), (3, 'c');",
23559 )
23560 .unwrap();
23561 let before =
23563 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM rolling", &[]).unwrap();
23564 assert_eq!(before.rows[0][0], Scalar::Int64(3));
23565
23566 truncate_sql_table(&conn, "rolling", None).unwrap();
23567
23568 let after =
23570 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM rolling", &[]).unwrap();
23571 assert_eq!(after.rows[0][0], Scalar::Int64(0));
23572 assert!(super::SqlConnection::table_exists(&conn, "rolling").unwrap());
23573 }
23574
23575 #[cfg(feature = "sql-sqlite")]
23576 #[test]
23577 fn truncate_sql_table_schema_silently_ignored_on_sqlite() {
23578 let conn = make_sql_test_conn();
23579 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
23580 super::SqlConnection::execute_batch(&conn, "INSERT INTO t VALUES (1);").unwrap();
23581 truncate_sql_table(&conn, "t", Some("ignored_on_sqlite"))
23582 .expect("schema arg must not error on SQLite");
23583 let count = super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM t", &[]).unwrap();
23584 assert_eq!(count.rows[0][0], Scalar::Int64(0));
23585 }
23586
23587 #[cfg(feature = "sql-sqlite")]
23588 #[test]
23589 fn truncate_sql_table_rejects_invalid_table_name() {
23590 let conn = make_sql_test_conn();
23591 let err = truncate_sql_table(&conn, "x; DROP TABLE users", None)
23592 .expect_err("must reject invalid identifier");
23593 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
23594 }
23595
23596 #[test]
23597 fn truncate_sql_table_routes_schema_to_quote_identifier() {
23598 use std::cell::RefCell;
23600 struct PgLikeRecorder {
23601 statements: RefCell<Vec<String>>,
23602 }
23603 impl super::SqlConnection for PgLikeRecorder {
23604 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23605 Ok(SqlQueryResult {
23606 columns: vec![],
23607 rows: vec![],
23608 })
23609 }
23610 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
23611 self.statements.borrow_mut().push(sql.to_owned());
23612 Ok(())
23613 }
23614 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23615 Ok(false)
23616 }
23617 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23618 Ok(())
23619 }
23620 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23621 "TEXT"
23622 }
23623 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23624 "TEXT"
23625 }
23626 fn supports_schemas(&self) -> bool {
23627 true
23628 }
23629 }
23630 let conn = PgLikeRecorder {
23631 statements: RefCell::new(vec![]),
23632 };
23633 truncate_sql_table(&conn, "events", Some("analytics")).unwrap();
23634 let stmts = conn.statements.borrow();
23635 assert_eq!(stmts.len(), 1);
23636 assert!(
23639 stmts[0].contains("DELETE FROM \"analytics\".\"events\""),
23640 "expected schema-qualified DELETE; got: {}",
23641 stmts[0]
23642 );
23643 }
23644
23645 #[test]
23646 fn truncate_sql_table_backend_override_uses_truncate_keyword() {
23647 use std::cell::RefCell;
23650 struct FastTruncate {
23651 statements: RefCell<Vec<String>>,
23652 }
23653 impl super::SqlConnection for FastTruncate {
23654 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23655 Ok(SqlQueryResult {
23656 columns: vec![],
23657 rows: vec![],
23658 })
23659 }
23660 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
23661 self.statements.borrow_mut().push(sql.to_owned());
23662 Ok(())
23663 }
23664 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23665 Ok(false)
23666 }
23667 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23668 Ok(())
23669 }
23670 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23671 "TEXT"
23672 }
23673 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23674 "TEXT"
23675 }
23676 fn truncate_table(
23677 &self,
23678 table_name: &str,
23679 _schema: Option<&str>,
23680 ) -> Result<(), IoError> {
23681 self.execute_batch(&format!("TRUNCATE TABLE \"{table_name}\""))
23682 }
23683 }
23684 let conn = FastTruncate {
23685 statements: RefCell::new(vec![]),
23686 };
23687 truncate_sql_table(&conn, "events", None).unwrap();
23688 let stmts = conn.statements.borrow();
23689 assert_eq!(stmts.len(), 1);
23690 assert!(stmts[0].starts_with("TRUNCATE TABLE"), "got: {}", stmts[0]);
23691 }
23692
23693 #[cfg(feature = "sql-sqlite")]
23696 #[test]
23697 fn sql_server_version_returns_sqlite_version_string() {
23698 let conn = make_sql_test_conn();
23699 let version = sql_server_version(&conn)
23700 .unwrap()
23701 .expect("SQLite reports version");
23702 let parts: Vec<&str> = version.split('.').collect();
23705 assert!(parts.len() >= 2, "expected dotted version; got: {version}");
23706 for part in &parts {
23707 assert!(
23708 !part.is_empty() && part.chars().all(|c| c.is_ascii_digit()),
23709 "expected numeric version parts; got {version}"
23710 );
23711 }
23712 }
23713
23714 #[cfg(feature = "sql-sqlite")]
23715 #[test]
23716 fn sql_server_version_starts_with_three_for_sqlite_3_x() {
23717 let conn = make_sql_test_conn();
23720 let version = sql_server_version(&conn).unwrap().unwrap();
23721 assert!(
23722 version.starts_with("3."),
23723 "expected SQLite 3.x; got {version}"
23724 );
23725 }
23726
23727 #[test]
23728 fn sql_server_version_default_impl_returns_none() {
23729 struct NoIntrospection;
23730 impl super::SqlConnection for NoIntrospection {
23731 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23732 Ok(SqlQueryResult {
23733 columns: vec![],
23734 rows: vec![],
23735 })
23736 }
23737 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23738 Ok(())
23739 }
23740 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23741 Ok(false)
23742 }
23743 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23744 Ok(())
23745 }
23746 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23747 "TEXT"
23748 }
23749 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23750 "TEXT"
23751 }
23752 }
23753 let conn = NoIntrospection;
23754 assert!(sql_server_version(&conn).unwrap().is_none());
23755 }
23756
23757 #[test]
23758 fn sql_server_version_routes_to_backend_override() {
23759 struct PgLikeStub;
23760 impl super::SqlConnection for PgLikeStub {
23761 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23762 Ok(SqlQueryResult {
23763 columns: vec![],
23764 rows: vec![],
23765 })
23766 }
23767 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23768 Ok(())
23769 }
23770 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23771 Ok(false)
23772 }
23773 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23774 Ok(())
23775 }
23776 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23777 "TEXT"
23778 }
23779 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23780 "TEXT"
23781 }
23782 fn server_version(&self) -> Result<Option<String>, IoError> {
23783 Ok(Some("16.2".to_owned()))
23785 }
23786 }
23787 let conn = PgLikeStub;
23788 assert_eq!(sql_server_version(&conn).unwrap().as_deref(), Some("16.2"));
23789 }
23790
23791 #[test]
23792 fn sql_server_version_propagates_backend_error() {
23793 struct BrokenIntrospection;
23794 impl super::SqlConnection for BrokenIntrospection {
23795 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23796 Ok(SqlQueryResult {
23797 columns: vec![],
23798 rows: vec![],
23799 })
23800 }
23801 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23802 Ok(())
23803 }
23804 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23805 Ok(false)
23806 }
23807 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23808 Ok(())
23809 }
23810 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23811 "TEXT"
23812 }
23813 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23814 "TEXT"
23815 }
23816 fn server_version(&self) -> Result<Option<String>, IoError> {
23817 Err(IoError::Sql("connection lost".to_owned()))
23818 }
23819 }
23820 let conn = BrokenIntrospection;
23821 let err = sql_server_version(&conn).expect_err("should surface backend error");
23822 assert!(matches!(err, IoError::Sql(msg) if msg.contains("connection lost")));
23823 }
23824
23825 #[cfg(feature = "sql-sqlite")]
23829 #[test]
23830 fn sql_primary_key_columns_unknown_table_returns_empty() {
23831 let conn = make_sql_test_conn();
23832 let pk = sql_primary_key_columns(&conn, "no_such_table", None).unwrap();
23833 assert!(pk.is_empty());
23834 }
23835
23836 #[cfg(feature = "sql-sqlite")]
23837 #[test]
23838 fn sql_primary_key_columns_table_without_pk_returns_empty() {
23839 let conn = make_sql_test_conn();
23840 super::SqlConnection::execute_batch(&conn, "CREATE TABLE no_pk (a INTEGER, b TEXT);")
23841 .unwrap();
23842 let pk = sql_primary_key_columns(&conn, "no_pk", None).unwrap();
23843 assert!(pk.is_empty());
23844 }
23845
23846 #[cfg(feature = "sql-sqlite")]
23847 #[test]
23848 fn sql_primary_key_columns_single_pk() {
23849 let conn = make_sql_test_conn();
23850 super::SqlConnection::execute_batch(
23851 &conn,
23852 "CREATE TABLE single_pk (id INTEGER PRIMARY KEY, name TEXT);",
23853 )
23854 .unwrap();
23855 let pk = sql_primary_key_columns(&conn, "single_pk", None).unwrap();
23856 assert_eq!(pk, vec!["id"]);
23857 }
23858
23859 #[cfg(feature = "sql-sqlite")]
23860 #[test]
23861 fn sql_primary_key_columns_composite_pk_ordered_by_ordinal() {
23862 let conn = make_sql_test_conn();
23863 super::SqlConnection::execute_batch(
23864 &conn,
23865 "CREATE TABLE composite ( \
23866 year INTEGER NOT NULL, \
23867 month INTEGER NOT NULL, \
23868 code TEXT NOT NULL, \
23869 value REAL, \
23870 PRIMARY KEY (year, month, code) \
23871 );",
23872 )
23873 .unwrap();
23874 let pk = sql_primary_key_columns(&conn, "composite", None).unwrap();
23875 assert_eq!(pk, vec!["year", "month", "code"]);
23877 }
23878
23879 #[test]
23880 fn sql_primary_key_columns_default_impl_returns_empty_when_no_introspection() {
23881 struct NoIntrospection;
23884 impl super::SqlConnection for NoIntrospection {
23885 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23886 Ok(SqlQueryResult {
23887 columns: vec![],
23888 rows: vec![],
23889 })
23890 }
23891 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23892 Ok(())
23893 }
23894 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23895 Ok(false)
23896 }
23897 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23898 Ok(())
23899 }
23900 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23901 "TEXT"
23902 }
23903 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23904 "TEXT"
23905 }
23906 }
23907 let conn = NoIntrospection;
23908 assert!(
23909 sql_primary_key_columns(&conn, "anything", None)
23910 .unwrap()
23911 .is_empty()
23912 );
23913 }
23914
23915 #[test]
23916 fn sql_primary_key_columns_routes_schema_to_table_schema_override() {
23917 struct MultiSchemaPk;
23920 impl super::SqlConnection for MultiSchemaPk {
23921 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23922 Ok(SqlQueryResult {
23923 columns: vec![],
23924 rows: vec![],
23925 })
23926 }
23927 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23928 Ok(())
23929 }
23930 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23931 Ok(false)
23932 }
23933 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23934 Ok(())
23935 }
23936 fn dtype_sql(&self, _dtype: DType) -> &'static str {
23937 "TEXT"
23938 }
23939 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23940 "TEXT"
23941 }
23942 fn supports_schemas(&self) -> bool {
23943 true
23944 }
23945 fn table_schema(
23946 &self,
23947 table: &str,
23948 schema: Option<&str>,
23949 ) -> Result<Option<SqlTableSchema>, IoError> {
23950 if table == "events" && schema == Some("analytics") {
23951 Ok(Some(SqlTableSchema {
23952 table_name: "events".to_owned(),
23953 columns: vec![
23954 SqlColumnSchema {
23957 name: "code".to_owned(),
23958 declared_type: Some("TEXT".to_owned()),
23959 nullable: false,
23960 default_value: None,
23961 primary_key_ordinal: Some(2),
23962 comment: None,
23963 autoincrement: false,
23964 },
23965 SqlColumnSchema {
23966 name: "year".to_owned(),
23967 declared_type: Some("INTEGER".to_owned()),
23968 nullable: false,
23969 default_value: None,
23970 primary_key_ordinal: Some(0),
23971 comment: None,
23972 autoincrement: false,
23973 },
23974 SqlColumnSchema {
23975 name: "value".to_owned(),
23976 declared_type: Some("REAL".to_owned()),
23977 nullable: true,
23978 default_value: None,
23979 primary_key_ordinal: None,
23980 comment: None,
23981 autoincrement: false,
23982 },
23983 SqlColumnSchema {
23984 name: "month".to_owned(),
23985 declared_type: Some("INTEGER".to_owned()),
23986 nullable: false,
23987 default_value: None,
23988 primary_key_ordinal: Some(1),
23989 comment: None,
23990 autoincrement: false,
23991 },
23992 ],
23993 }))
23994 } else {
23995 Ok(None)
23996 }
23997 }
23998 }
23999 let conn = MultiSchemaPk;
24000 let pk = sql_primary_key_columns(&conn, "events", Some("analytics")).unwrap();
24001 assert_eq!(pk, vec!["year", "month", "code"]);
24003 assert!(
24005 sql_primary_key_columns(&conn, "events", Some("audit"))
24006 .unwrap()
24007 .is_empty()
24008 );
24009 }
24010
24011 #[cfg(feature = "sql-sqlite")]
24015 #[test]
24016 fn sql_max_identifier_length_returns_none_on_sqlite() {
24017 let conn = make_sql_test_conn();
24020 assert_eq!(sql_max_identifier_length(&conn), None);
24021 }
24022
24023 #[test]
24024 fn sql_max_identifier_length_default_impl_returns_none() {
24025 struct Generic;
24026 impl super::SqlConnection for Generic {
24027 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24028 Ok(SqlQueryResult {
24029 columns: vec![],
24030 rows: vec![],
24031 })
24032 }
24033 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24034 Ok(())
24035 }
24036 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24037 Ok(false)
24038 }
24039 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24040 Ok(())
24041 }
24042 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24043 "TEXT"
24044 }
24045 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24046 "TEXT"
24047 }
24048 }
24049 assert_eq!(sql_max_identifier_length(&Generic), None);
24050 }
24051
24052 #[test]
24053 fn sql_max_identifier_length_pg_override_reports_63() {
24054 struct PgLikeStub;
24055 impl super::SqlConnection for PgLikeStub {
24056 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24057 Ok(SqlQueryResult {
24058 columns: vec![],
24059 rows: vec![],
24060 })
24061 }
24062 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24063 Ok(())
24064 }
24065 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24066 Ok(false)
24067 }
24068 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24069 Ok(())
24070 }
24071 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24072 "TEXT"
24073 }
24074 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24075 "TEXT"
24076 }
24077 fn max_identifier_length(&self) -> Option<usize> {
24078 Some(63)
24079 }
24080 }
24081 assert_eq!(sql_max_identifier_length(&PgLikeStub), Some(63));
24082 }
24083
24084 #[test]
24085 fn sql_max_identifier_length_mysql_override_reports_64() {
24086 struct MySqlLikeStub;
24087 impl super::SqlConnection for MySqlLikeStub {
24088 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24089 Ok(SqlQueryResult {
24090 columns: vec![],
24091 rows: vec![],
24092 })
24093 }
24094 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24095 Ok(())
24096 }
24097 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24098 Ok(false)
24099 }
24100 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24101 Ok(())
24102 }
24103 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24104 "TEXT"
24105 }
24106 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24107 "TEXT"
24108 }
24109 fn max_identifier_length(&self) -> Option<usize> {
24110 Some(64)
24111 }
24112 }
24113 assert_eq!(sql_max_identifier_length(&MySqlLikeStub), Some(64));
24114 }
24115
24116 #[test]
24117 fn sql_max_identifier_length_mssql_override_reports_128() {
24118 struct MsSqlLikeStub;
24119 impl super::SqlConnection for MsSqlLikeStub {
24120 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24121 Ok(SqlQueryResult {
24122 columns: vec![],
24123 rows: vec![],
24124 })
24125 }
24126 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24127 Ok(())
24128 }
24129 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24130 Ok(false)
24131 }
24132 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24133 Ok(())
24134 }
24135 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24136 "TEXT"
24137 }
24138 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24139 "TEXT"
24140 }
24141 fn max_identifier_length(&self) -> Option<usize> {
24142 Some(128)
24143 }
24144 }
24145 assert_eq!(sql_max_identifier_length(&MsSqlLikeStub), Some(128));
24146 }
24147
24148 #[cfg(feature = "sql-sqlite")]
24152 #[test]
24153 fn sql_backend_caps_sqlite_reports_param_and_row_caps() {
24154 let conn = make_sql_test_conn();
24155 let caps = sql_backend_caps(&conn).unwrap();
24156
24157 assert_eq!(caps.dialect_name, "sqlite");
24158 assert!(
24159 caps.server_version
24160 .as_deref()
24161 .is_some_and(|v| v.starts_with("3."))
24162 );
24163 assert!(caps.supports_returning);
24164 assert!(!caps.supports_schemas);
24165 assert_eq!(caps.max_param_count, Some(32766));
24166 assert_eq!(caps.max_identifier_length, None);
24167 assert_eq!(caps.max_insert_rows(3), Some(10922));
24168 assert_eq!(caps.max_insert_rows(0), None);
24169 assert_eq!(sql_max_param_count(&conn), Some(32766));
24170 assert_eq!(sql_max_insert_rows(&conn, 4), Some(8191));
24171 assert!(sql_supports_returning(&conn));
24172 assert!(!sql_supports_schemas(&conn));
24173 }
24174
24175 #[test]
24176 fn sql_inspector_backend_caps_pg_like_stub_reports_limits() {
24177 struct PgLikeCaps;
24178 impl super::SqlConnection for PgLikeCaps {
24179 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24180 Ok(SqlQueryResult {
24181 columns: vec![],
24182 rows: vec![],
24183 })
24184 }
24185 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24186 Ok(())
24187 }
24188 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24189 Ok(false)
24190 }
24191 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24192 Ok(())
24193 }
24194 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24195 "TEXT"
24196 }
24197 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24198 "TEXT"
24199 }
24200 fn dialect_name(&self) -> &'static str {
24201 "postgresql"
24202 }
24203 fn server_version(&self) -> Result<Option<String>, IoError> {
24204 Ok(Some("16.3".to_owned()))
24205 }
24206 fn supports_returning(&self) -> bool {
24207 true
24208 }
24209 fn supports_schemas(&self) -> bool {
24210 true
24211 }
24212 fn max_param_count(&self) -> Option<usize> {
24213 Some(65535)
24214 }
24215 fn max_identifier_length(&self) -> Option<usize> {
24216 Some(63)
24217 }
24218 }
24219
24220 let conn = PgLikeCaps;
24221 let inspector = SqlInspector::new(&conn);
24222 let caps = inspector.backend_caps().unwrap();
24223
24224 assert_eq!(inspector.dialect_name(), "postgresql");
24225 assert_eq!(inspector.server_version().unwrap().as_deref(), Some("16.3"));
24226 assert!(inspector.supports_returning());
24227 assert!(inspector.supports_schemas());
24228 assert_eq!(inspector.max_param_count(), Some(65535));
24229 assert_eq!(inspector.max_identifier_length(), Some(63));
24230 assert_eq!(inspector.max_insert_rows(4), Some(16383));
24231 assert_eq!(caps.max_insert_rows(4), Some(16383));
24232 assert_eq!(
24233 caps,
24234 SqlBackendCaps {
24235 dialect_name: "postgresql",
24236 server_version: Some("16.3".to_owned()),
24237 supports_returning: true,
24238 supports_schemas: true,
24239 max_param_count: Some(65535),
24240 max_identifier_length: Some(63),
24241 }
24242 );
24243 }
24244
24245 #[cfg(feature = "sql-sqlite")]
24248 #[test]
24249 fn write_sql_long_column_name_succeeds_on_sqlite() {
24250 let conn = make_sql_test_conn();
24252 let long_col: String = std::iter::repeat_n('a', 80).collect();
24254 let frame = fp_frame::DataFrame::from_dict(
24255 &[long_col.as_str()],
24256 vec![(long_col.as_str(), vec![Scalar::Int64(1)])],
24257 )
24258 .unwrap();
24259 write_sql_with_options(
24260 &frame,
24261 &conn,
24262 "long_col_tbl",
24263 &SqlWriteOptions {
24264 if_exists: SqlIfExists::Fail,
24265 index: false,
24266 index_label: None,
24267 schema: None,
24268 dtype: None,
24269 method: SqlInsertMethod::Single,
24270 chunksize: None,
24271 },
24272 )
24273 .expect("SQLite has no identifier limit");
24274 }
24275
24276 fn make_pg_like_recorder() -> impl super::SqlConnection + 'static {
24277 struct PgLikeLimit;
24281 impl super::SqlConnection for PgLikeLimit {
24282 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24283 Ok(SqlQueryResult {
24284 columns: vec![],
24285 rows: vec![],
24286 })
24287 }
24288 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24289 Ok(())
24290 }
24291 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24292 Ok(false)
24293 }
24294 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24295 Ok(())
24296 }
24297 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24298 "TEXT"
24299 }
24300 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24301 "TEXT"
24302 }
24303 fn max_identifier_length(&self) -> Option<usize> {
24304 Some(63)
24305 }
24306 fn supports_schemas(&self) -> bool {
24307 true
24308 }
24309 }
24310 PgLikeLimit
24311 }
24312
24313 #[test]
24314 fn write_sql_rejects_long_column_name_on_pg_like_backend() {
24315 let conn = make_pg_like_recorder();
24316 let long_col: String = std::iter::repeat_n('c', 64).collect();
24317 let frame = fp_frame::DataFrame::from_dict(
24318 &[long_col.as_str()],
24319 vec![(long_col.as_str(), vec![Scalar::Int64(1)])],
24320 )
24321 .unwrap();
24322 let err = write_sql_with_options(
24323 &frame,
24324 &conn,
24325 "ok_tbl",
24326 &SqlWriteOptions {
24327 if_exists: SqlIfExists::Fail,
24328 index: false,
24329 index_label: None,
24330 schema: None,
24331 dtype: None,
24332 method: SqlInsertMethod::Single,
24333 chunksize: None,
24334 },
24335 )
24336 .expect_err("64-char column must exceed PG limit");
24337 assert!(matches!(err, IoError::Sql(msg) if msg.contains("column") && msg.contains("63")));
24338 }
24339
24340 #[test]
24341 fn write_sql_rejects_long_table_name_on_pg_like_backend() {
24342 let conn = make_pg_like_recorder();
24343 let long_tbl: String = std::iter::repeat_n('t', 64).collect();
24346 let frame =
24347 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24348 let err = write_sql_with_options(
24349 &frame,
24350 &conn,
24351 &long_tbl,
24352 &SqlWriteOptions {
24353 if_exists: SqlIfExists::Fail,
24354 index: false,
24355 index_label: None,
24356 schema: None,
24357 dtype: None,
24358 method: SqlInsertMethod::Single,
24359 chunksize: None,
24360 },
24361 )
24362 .expect_err("64-char table must exceed PG limit");
24363 assert!(matches!(err, IoError::Sql(msg) if msg.contains("table") && msg.contains("63")));
24364 }
24365
24366 #[test]
24367 fn write_sql_rejects_long_index_label_on_pg_like_backend() {
24368 let conn = make_pg_like_recorder();
24369 let long_label: String = std::iter::repeat_n('i', 64).collect();
24370 let frame =
24371 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24372 let err = write_sql_with_options(
24373 &frame,
24374 &conn,
24375 "ok_tbl",
24376 &SqlWriteOptions {
24377 if_exists: SqlIfExists::Fail,
24378 index: true,
24379 index_label: Some(long_label),
24380 schema: None,
24381 dtype: None,
24382 method: SqlInsertMethod::Single,
24383 chunksize: None,
24384 },
24385 )
24386 .expect_err("64-char index label must exceed PG limit");
24387 assert!(
24388 matches!(err, IoError::Sql(msg) if msg.contains("index label") && msg.contains("63"))
24389 );
24390 }
24391
24392 #[test]
24393 fn write_sql_rejects_long_schema_name_on_pg_like_backend() {
24394 let conn = make_pg_like_recorder();
24395 let long_schema: String = std::iter::repeat_n('s', 64).collect();
24396 let frame =
24397 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24398 let err = write_sql_with_options(
24399 &frame,
24400 &conn,
24401 "ok_tbl",
24402 &SqlWriteOptions {
24403 if_exists: SqlIfExists::Fail,
24404 index: false,
24405 index_label: None,
24406 schema: Some(long_schema),
24407 dtype: None,
24408 method: SqlInsertMethod::Single,
24409 chunksize: None,
24410 },
24411 )
24412 .expect_err("64-char schema must exceed PG limit");
24413 assert!(matches!(err, IoError::Sql(msg) if msg.contains("schema") && msg.contains("63")));
24414 }
24415
24416 #[test]
24417 fn write_sql_just_at_the_boundary_is_accepted() {
24418 let conn = make_pg_like_recorder();
24419 let just_fits: String = std::iter::repeat_n('a', 63).collect();
24421 let frame = fp_frame::DataFrame::from_dict(
24422 &[just_fits.as_str()],
24423 vec![(just_fits.as_str(), vec![Scalar::Int64(1)])],
24424 )
24425 .unwrap();
24426 write_sql_with_options(
24427 &frame,
24428 &conn,
24429 "ok_tbl",
24430 &SqlWriteOptions {
24431 if_exists: SqlIfExists::Fail,
24432 index: false,
24433 index_label: None,
24434 schema: None,
24435 dtype: None,
24436 method: SqlInsertMethod::Single,
24437 chunksize: None,
24438 },
24439 )
24440 .expect("63-char column at boundary should be accepted");
24441 }
24442
24443 #[cfg(feature = "sql-sqlite")]
24446 #[test]
24447 fn list_sql_indexes_unknown_table_returns_empty() {
24448 let conn = make_sql_test_conn();
24449 let indexes = list_sql_indexes(&conn, "no_such_tbl", None).unwrap();
24450 assert!(indexes.is_empty());
24451 }
24452
24453 #[cfg(feature = "sql-sqlite")]
24454 #[test]
24455 fn list_sql_indexes_table_without_indexes() {
24456 let conn = make_sql_test_conn();
24457 super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
24458 .unwrap();
24459 let indexes = list_sql_indexes(&conn, "plain", None).unwrap();
24460 assert!(indexes.is_empty());
24461 }
24462
24463 #[cfg(feature = "sql-sqlite")]
24464 #[test]
24465 fn list_sql_indexes_single_column() {
24466 let conn = make_sql_test_conn();
24467 super::SqlConnection::execute_batch(&conn, "CREATE TABLE events (id INTEGER, ts TEXT);")
24468 .unwrap();
24469 super::SqlConnection::execute_batch(&conn, "CREATE INDEX idx_events_ts ON events (ts);")
24470 .unwrap();
24471 let indexes = list_sql_indexes(&conn, "events", None).unwrap();
24472 assert_eq!(indexes.len(), 1);
24473 assert_eq!(indexes[0].name, "idx_events_ts");
24474 assert_eq!(indexes[0].columns, vec!["ts"]);
24475 assert!(!indexes[0].unique);
24476 }
24477
24478 #[cfg(feature = "sql-sqlite")]
24479 #[test]
24480 fn list_sql_indexes_unique_index() {
24481 let conn = make_sql_test_conn();
24482 super::SqlConnection::execute_batch(&conn, "CREATE TABLE users (id INTEGER, email TEXT);")
24483 .unwrap();
24484 super::SqlConnection::execute_batch(
24485 &conn,
24486 "CREATE UNIQUE INDEX idx_users_email ON users (email);",
24487 )
24488 .unwrap();
24489 let indexes = list_sql_indexes(&conn, "users", None).unwrap();
24490 assert_eq!(indexes.len(), 1);
24491 assert_eq!(indexes[0].name, "idx_users_email");
24492 assert_eq!(indexes[0].columns, vec!["email"]);
24493 assert!(indexes[0].unique);
24494 }
24495
24496 #[cfg(feature = "sql-sqlite")]
24497 #[test]
24498 fn list_sql_indexes_composite_columns_in_definition_order() {
24499 let conn = make_sql_test_conn();
24500 super::SqlConnection::execute_batch(
24501 &conn,
24502 "CREATE TABLE rolling (year INT, month INT, code TEXT, val REAL);",
24503 )
24504 .unwrap();
24505 super::SqlConnection::execute_batch(
24506 &conn,
24507 "CREATE INDEX idx_rolling_y_m_c ON rolling (year, month, code);",
24508 )
24509 .unwrap();
24510 let indexes = list_sql_indexes(&conn, "rolling", None).unwrap();
24511 assert_eq!(indexes.len(), 1);
24512 assert_eq!(indexes[0].columns, vec!["year", "month", "code"]);
24513 }
24514
24515 #[cfg(feature = "sql-sqlite")]
24516 #[test]
24517 fn list_sql_indexes_filters_pk_auto_index() {
24518 let conn = make_sql_test_conn();
24522 super::SqlConnection::execute_batch(
24523 &conn,
24524 "CREATE TABLE pk_only (id INTEGER PRIMARY KEY, name TEXT);",
24525 )
24526 .unwrap();
24527 super::SqlConnection::execute_batch(
24528 &conn,
24529 "CREATE INDEX idx_pk_only_name ON pk_only (name);",
24530 )
24531 .unwrap();
24532 let indexes = list_sql_indexes(&conn, "pk_only", None).unwrap();
24533 assert_eq!(indexes.len(), 1);
24535 assert_eq!(indexes[0].name, "idx_pk_only_name");
24536 }
24537
24538 #[cfg(feature = "sql-sqlite")]
24539 #[test]
24540 fn list_sql_indexes_rejects_invalid_table_name() {
24541 let conn = make_sql_test_conn();
24542 let err = list_sql_indexes(&conn, "x; DROP TABLE users", None)
24543 .expect_err("must reject invalid identifier");
24544 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
24545 }
24546
24547 #[test]
24548 fn list_sql_indexes_default_impl_returns_empty() {
24549 struct NoIntrospection;
24550 impl super::SqlConnection for NoIntrospection {
24551 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24552 Ok(SqlQueryResult {
24553 columns: vec![],
24554 rows: vec![],
24555 })
24556 }
24557 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24558 Ok(())
24559 }
24560 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24561 Ok(false)
24562 }
24563 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24564 Ok(())
24565 }
24566 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24567 "TEXT"
24568 }
24569 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24570 "TEXT"
24571 }
24572 }
24573 assert!(
24574 list_sql_indexes(&NoIntrospection, "anything", None)
24575 .unwrap()
24576 .is_empty()
24577 );
24578 }
24579
24580 #[test]
24581 fn list_sql_indexes_routes_to_backend_override() {
24582 struct MultiSchemaIdx;
24583 impl super::SqlConnection for MultiSchemaIdx {
24584 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24585 Ok(SqlQueryResult {
24586 columns: vec![],
24587 rows: vec![],
24588 })
24589 }
24590 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24591 Ok(())
24592 }
24593 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24594 Ok(false)
24595 }
24596 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24597 Ok(())
24598 }
24599 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24600 "TEXT"
24601 }
24602 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24603 "TEXT"
24604 }
24605 fn supports_schemas(&self) -> bool {
24606 true
24607 }
24608 fn list_indexes(
24609 &self,
24610 table: &str,
24611 schema: Option<&str>,
24612 ) -> Result<Vec<SqlIndexSchema>, IoError> {
24613 if table == "events" && schema == Some("analytics") {
24614 Ok(vec![
24615 SqlIndexSchema {
24616 name: "idx_events_ts".to_owned(),
24617 columns: vec!["ts".to_owned()],
24618 unique: false,
24619 },
24620 SqlIndexSchema {
24621 name: "uq_events_uid".to_owned(),
24622 columns: vec!["user_id".to_owned()],
24623 unique: true,
24624 },
24625 ])
24626 } else {
24627 Ok(vec![])
24628 }
24629 }
24630 }
24631 let conn = MultiSchemaIdx;
24632 let indexes = list_sql_indexes(&conn, "events", Some("analytics")).unwrap();
24633 assert_eq!(indexes.len(), 2);
24634 assert!(
24635 indexes
24636 .iter()
24637 .any(|i| i.unique && i.name == "uq_events_uid")
24638 );
24639 assert!(
24641 list_sql_indexes(&conn, "events", Some("audit"))
24642 .unwrap()
24643 .is_empty()
24644 );
24645 }
24646
24647 #[cfg(feature = "sql-sqlite")]
24651 #[test]
24652 fn list_sql_foreign_keys_unknown_table_returns_empty() {
24653 let conn = make_sql_test_conn();
24654 let fks = list_sql_foreign_keys(&conn, "no_such_tbl", None).unwrap();
24655 assert!(fks.is_empty());
24656 }
24657
24658 #[cfg(feature = "sql-sqlite")]
24659 #[test]
24660 fn list_sql_foreign_keys_table_without_fk() {
24661 let conn = make_sql_test_conn();
24662 super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
24663 .unwrap();
24664 let fks = list_sql_foreign_keys(&conn, "plain", None).unwrap();
24665 assert!(fks.is_empty());
24666 }
24667
24668 #[cfg(feature = "sql-sqlite")]
24669 #[test]
24670 fn list_sql_foreign_keys_single_column_fk() {
24671 let conn = make_sql_test_conn();
24672 super::SqlConnection::execute_batch(
24673 &conn,
24674 "CREATE TABLE parent (id INTEGER PRIMARY KEY, label TEXT);",
24675 )
24676 .unwrap();
24677 super::SqlConnection::execute_batch(
24678 &conn,
24679 "CREATE TABLE child (cid INTEGER, parent_id INTEGER, \
24680 FOREIGN KEY (parent_id) REFERENCES parent(id));",
24681 )
24682 .unwrap();
24683 let fks = list_sql_foreign_keys(&conn, "child", None).unwrap();
24684 assert_eq!(fks.len(), 1);
24685 assert_eq!(fks[0].columns, vec!["parent_id"]);
24686 assert_eq!(fks[0].referenced_table, "parent");
24687 assert_eq!(fks[0].referenced_columns, vec!["id"]);
24688 assert!(fks[0].constraint_name.is_none());
24690 }
24691
24692 #[cfg(feature = "sql-sqlite")]
24693 #[test]
24694 fn list_sql_foreign_keys_composite_fk_ordered_by_seq() {
24695 let conn = make_sql_test_conn();
24696 super::SqlConnection::execute_batch(
24697 &conn,
24698 "CREATE TABLE rolling ( \
24699 year INTEGER NOT NULL, \
24700 month INTEGER NOT NULL, \
24701 code TEXT NOT NULL, \
24702 PRIMARY KEY (year, month, code) \
24703 );",
24704 )
24705 .unwrap();
24706 super::SqlConnection::execute_batch(
24707 &conn,
24708 "CREATE TABLE rolling_fact ( \
24709 fact_id INTEGER, year INTEGER, month INTEGER, code TEXT, \
24710 FOREIGN KEY (year, month, code) \
24711 REFERENCES rolling(year, month, code) \
24712 );",
24713 )
24714 .unwrap();
24715 let fks = list_sql_foreign_keys(&conn, "rolling_fact", None).unwrap();
24716 assert_eq!(fks.len(), 1);
24717 assert_eq!(fks[0].columns, vec!["year", "month", "code"]);
24719 assert_eq!(fks[0].referenced_columns, vec!["year", "month", "code"]);
24720 assert_eq!(fks[0].referenced_table, "rolling");
24721 }
24722
24723 #[cfg(feature = "sql-sqlite")]
24724 #[test]
24725 fn list_sql_foreign_keys_multiple_fks_on_one_table() {
24726 let conn = make_sql_test_conn();
24727 super::SqlConnection::execute_batch(&conn, "CREATE TABLE users (id INTEGER PRIMARY KEY);")
24728 .unwrap();
24729 super::SqlConnection::execute_batch(&conn, "CREATE TABLE products (sku TEXT PRIMARY KEY);")
24730 .unwrap();
24731 super::SqlConnection::execute_batch(
24732 &conn,
24733 "CREATE TABLE orders ( \
24734 oid INTEGER, \
24735 user_id INTEGER, \
24736 product_sku TEXT, \
24737 FOREIGN KEY (user_id) REFERENCES users(id), \
24738 FOREIGN KEY (product_sku) REFERENCES products(sku) \
24739 );",
24740 )
24741 .unwrap();
24742 let fks = list_sql_foreign_keys(&conn, "orders", None).unwrap();
24743 assert_eq!(fks.len(), 2);
24744 let user_fk = fks.iter().find(|f| f.referenced_table == "users").unwrap();
24745 assert_eq!(user_fk.columns, vec!["user_id"]);
24746 assert_eq!(user_fk.referenced_columns, vec!["id"]);
24747 let prod_fk = fks
24748 .iter()
24749 .find(|f| f.referenced_table == "products")
24750 .unwrap();
24751 assert_eq!(prod_fk.columns, vec!["product_sku"]);
24752 assert_eq!(prod_fk.referenced_columns, vec!["sku"]);
24753 }
24754
24755 #[cfg(feature = "sql-sqlite")]
24756 #[test]
24757 fn list_sql_foreign_keys_rejects_invalid_table_name() {
24758 let conn = make_sql_test_conn();
24759 let err = list_sql_foreign_keys(&conn, "x; DROP TABLE users", None)
24760 .expect_err("must reject invalid identifier");
24761 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
24762 }
24763
24764 #[cfg(feature = "sql-sqlite")]
24765 #[test]
24766 fn list_sql_foreign_keys_resolves_implicit_pk_single_column() {
24767 let conn = make_sql_test_conn();
24772 super::SqlConnection::execute_batch(
24773 &conn,
24774 "CREATE TABLE imp_parent (pid INTEGER PRIMARY KEY, label TEXT);",
24775 )
24776 .unwrap();
24777 super::SqlConnection::execute_batch(
24778 &conn,
24779 "CREATE TABLE imp_child ( \
24780 cid INTEGER, \
24781 parent_id INTEGER, \
24782 FOREIGN KEY (parent_id) REFERENCES imp_parent \
24783 );",
24784 )
24785 .unwrap();
24786 let fks = list_sql_foreign_keys(&conn, "imp_child", None).unwrap();
24787 assert_eq!(
24788 fks.len(),
24789 1,
24790 "implicit-PK FK must surface (was being silently dropped)"
24791 );
24792 assert_eq!(fks[0].columns, vec!["parent_id"]);
24793 assert_eq!(fks[0].referenced_table, "imp_parent");
24794 assert_eq!(fks[0].referenced_columns, vec!["pid"]);
24796 }
24797
24798 #[cfg(feature = "sql-sqlite")]
24799 #[test]
24800 fn list_sql_foreign_keys_resolves_implicit_pk_composite() {
24801 let conn = make_sql_test_conn();
24803 super::SqlConnection::execute_batch(
24804 &conn,
24805 "CREATE TABLE imp_parent_comp ( \
24806 year INTEGER NOT NULL, \
24807 month INTEGER NOT NULL, \
24808 PRIMARY KEY (year, month) \
24809 );",
24810 )
24811 .unwrap();
24812 super::SqlConnection::execute_batch(
24813 &conn,
24814 "CREATE TABLE imp_child_comp ( \
24815 cid INTEGER, \
24816 fyear INTEGER NOT NULL, \
24817 fmonth INTEGER NOT NULL, \
24818 FOREIGN KEY (fyear, fmonth) REFERENCES imp_parent_comp \
24819 );",
24820 )
24821 .unwrap();
24822 let fks = list_sql_foreign_keys(&conn, "imp_child_comp", None).unwrap();
24823 assert_eq!(fks.len(), 1);
24824 assert_eq!(fks[0].columns, vec!["fyear", "fmonth"]);
24825 assert_eq!(fks[0].referenced_table, "imp_parent_comp");
24826 assert_eq!(fks[0].referenced_columns, vec!["year", "month"]);
24828 }
24829
24830 #[cfg(feature = "sql-sqlite")]
24831 #[test]
24832 fn list_sql_foreign_keys_explicit_columns_unchanged() {
24833 let conn = make_sql_test_conn();
24836 super::SqlConnection::execute_batch(
24837 &conn,
24838 "CREATE TABLE exp_parent (pid INTEGER PRIMARY KEY);",
24839 )
24840 .unwrap();
24841 super::SqlConnection::execute_batch(
24842 &conn,
24843 "CREATE TABLE exp_child ( \
24844 cid INTEGER, \
24845 parent_id INTEGER, \
24846 FOREIGN KEY (parent_id) REFERENCES exp_parent(pid) \
24847 );",
24848 )
24849 .unwrap();
24850 let fks = list_sql_foreign_keys(&conn, "exp_child", None).unwrap();
24851 assert_eq!(fks.len(), 1);
24852 assert_eq!(fks[0].columns, vec!["parent_id"]);
24853 assert_eq!(fks[0].referenced_columns, vec!["pid"]);
24854 }
24855
24856 #[test]
24857 fn list_sql_foreign_keys_default_impl_returns_empty() {
24858 struct NoIntrospection;
24859 impl super::SqlConnection for NoIntrospection {
24860 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24861 Ok(SqlQueryResult {
24862 columns: vec![],
24863 rows: vec![],
24864 })
24865 }
24866 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24867 Ok(())
24868 }
24869 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24870 Ok(false)
24871 }
24872 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24873 Ok(())
24874 }
24875 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24876 "TEXT"
24877 }
24878 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24879 "TEXT"
24880 }
24881 }
24882 assert!(
24883 list_sql_foreign_keys(&NoIntrospection, "anything", None)
24884 .unwrap()
24885 .is_empty()
24886 );
24887 }
24888
24889 #[test]
24890 fn list_sql_foreign_keys_routes_to_backend_override() {
24891 struct MultiSchemaFk;
24892 impl super::SqlConnection for MultiSchemaFk {
24893 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24894 Ok(SqlQueryResult {
24895 columns: vec![],
24896 rows: vec![],
24897 })
24898 }
24899 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24900 Ok(())
24901 }
24902 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24903 Ok(false)
24904 }
24905 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24906 Ok(())
24907 }
24908 fn dtype_sql(&self, _dtype: DType) -> &'static str {
24909 "TEXT"
24910 }
24911 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24912 "TEXT"
24913 }
24914 fn supports_schemas(&self) -> bool {
24915 true
24916 }
24917 fn list_foreign_keys(
24918 &self,
24919 table: &str,
24920 schema: Option<&str>,
24921 ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
24922 if table == "orders" && schema == Some("sales") {
24923 Ok(vec![SqlForeignKeySchema {
24924 constraint_name: Some("orders_user_fk".to_owned()),
24925 columns: vec!["user_id".to_owned()],
24926 referenced_table: "users".to_owned(),
24927 referenced_columns: vec!["id".to_owned()],
24928 }])
24929 } else {
24930 Ok(vec![])
24931 }
24932 }
24933 }
24934 let conn = MultiSchemaFk;
24935 let fks = list_sql_foreign_keys(&conn, "orders", Some("sales")).unwrap();
24936 assert_eq!(fks.len(), 1);
24937 assert_eq!(fks[0].constraint_name.as_deref(), Some("orders_user_fk"));
24938 assert_eq!(fks[0].referenced_table, "users");
24939 assert!(
24941 list_sql_foreign_keys(&conn, "orders", Some("audit"))
24942 .unwrap()
24943 .is_empty()
24944 );
24945 }
24946
24947 #[cfg(feature = "sql-sqlite")]
24950 #[test]
24951 fn list_sql_views_empty_db_returns_empty() {
24952 let conn = make_sql_test_conn();
24953 let views = list_sql_views(&conn, None).unwrap();
24954 assert!(views.is_empty());
24955 }
24956
24957 #[cfg(feature = "sql-sqlite")]
24958 #[test]
24959 fn list_sql_views_returns_user_views_sorted() {
24960 let conn = make_sql_test_conn();
24961 super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (id INTEGER, val TEXT);")
24962 .unwrap();
24963 super::SqlConnection::execute_batch(
24964 &conn,
24965 "CREATE VIEW zebra_view AS SELECT id FROM base;",
24966 )
24967 .unwrap();
24968 super::SqlConnection::execute_batch(
24969 &conn,
24970 "CREATE VIEW alpha_view AS SELECT val FROM base;",
24971 )
24972 .unwrap();
24973 let views = list_sql_views(&conn, None).unwrap();
24974 assert_eq!(views, vec!["alpha_view", "zebra_view"]);
24975 }
24976
24977 #[cfg(feature = "sql-sqlite")]
24978 #[test]
24979 fn list_sql_views_separated_from_list_tables() {
24980 let conn = make_sql_test_conn();
24983 super::SqlConnection::execute_batch(&conn, "CREATE TABLE just_tbl (x INTEGER);").unwrap();
24984 super::SqlConnection::execute_batch(
24985 &conn,
24986 "CREATE VIEW just_view AS SELECT x FROM just_tbl;",
24987 )
24988 .unwrap();
24989
24990 let tables = list_sql_tables(&conn, None).unwrap();
24991 let views = list_sql_views(&conn, None).unwrap();
24992 assert_eq!(tables, vec!["just_tbl"]);
24993 assert_eq!(views, vec!["just_view"]);
24994 assert!(!tables.contains(&"just_view".to_owned()));
24995 assert!(!views.contains(&"just_tbl".to_owned()));
24996 }
24997
24998 #[cfg(feature = "sql-sqlite")]
24999 #[test]
25000 fn list_sql_views_schema_silently_ignored_on_sqlite() {
25001 let conn = make_sql_test_conn();
25002 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25003 super::SqlConnection::execute_batch(&conn, "CREATE VIEW v AS SELECT x FROM t;").unwrap();
25004 let with_schema =
25005 list_sql_views(&conn, Some("ignored_on_sqlite")).expect("schema arg must not error");
25006 let without_schema = list_sql_views(&conn, None).unwrap();
25007 assert_eq!(with_schema, without_schema);
25008 }
25009
25010 #[test]
25011 fn list_sql_views_default_impl_returns_empty() {
25012 struct NoIntrospection;
25013 impl super::SqlConnection for NoIntrospection {
25014 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25015 Ok(SqlQueryResult {
25016 columns: vec![],
25017 rows: vec![],
25018 })
25019 }
25020 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25021 Ok(())
25022 }
25023 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25024 Ok(false)
25025 }
25026 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25027 Ok(())
25028 }
25029 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25030 "TEXT"
25031 }
25032 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25033 "TEXT"
25034 }
25035 }
25036 let conn = NoIntrospection;
25037 assert!(list_sql_views(&conn, None).unwrap().is_empty());
25038 assert!(list_sql_views(&conn, Some("any")).unwrap().is_empty());
25039 }
25040
25041 #[test]
25042 fn list_sql_views_routes_schema_to_backend_override() {
25043 struct MultiSchemaViews;
25044 impl super::SqlConnection for MultiSchemaViews {
25045 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25046 Ok(SqlQueryResult {
25047 columns: vec![],
25048 rows: vec![],
25049 })
25050 }
25051 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25052 Ok(())
25053 }
25054 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25055 Ok(false)
25056 }
25057 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25058 Ok(())
25059 }
25060 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25061 "TEXT"
25062 }
25063 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25064 "TEXT"
25065 }
25066 fn supports_schemas(&self) -> bool {
25067 true
25068 }
25069 fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
25070 Ok(match schema {
25071 Some("reporting") => vec!["daily".to_owned(), "weekly".to_owned()],
25072 Some("audit") => vec!["log_view".to_owned()],
25073 _ => vec![],
25074 })
25075 }
25076 }
25077 let conn = MultiSchemaViews;
25078 assert_eq!(
25079 list_sql_views(&conn, Some("reporting")).unwrap(),
25080 vec!["daily", "weekly"]
25081 );
25082 assert_eq!(
25083 list_sql_views(&conn, Some("audit")).unwrap(),
25084 vec!["log_view"]
25085 );
25086 assert!(list_sql_views(&conn, None).unwrap().is_empty());
25087 }
25088
25089 #[cfg(feature = "sql-sqlite")]
25093 #[test]
25094 fn list_sql_unique_constraints_unknown_table_returns_empty() {
25095 let conn = make_sql_test_conn();
25096 let uqs = list_sql_unique_constraints(&conn, "no_such", None).unwrap();
25097 assert!(uqs.is_empty());
25098 }
25099
25100 #[cfg(feature = "sql-sqlite")]
25101 #[test]
25102 fn list_sql_unique_constraints_table_without_uq() {
25103 let conn = make_sql_test_conn();
25104 super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
25105 .unwrap();
25106 let uqs = list_sql_unique_constraints(&conn, "plain", None).unwrap();
25107 assert!(uqs.is_empty());
25108 }
25109
25110 #[cfg(feature = "sql-sqlite")]
25111 #[test]
25112 fn list_sql_unique_constraints_inline_unique() {
25113 let conn = make_sql_test_conn();
25114 super::SqlConnection::execute_batch(
25115 &conn,
25116 "CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT UNIQUE);",
25117 )
25118 .unwrap();
25119 let uqs = list_sql_unique_constraints(&conn, "users", None).unwrap();
25120 assert_eq!(uqs.len(), 1);
25121 assert_eq!(uqs[0].columns, vec!["email"]);
25122 assert!(
25124 uqs[0].name.starts_with("sqlite_autoindex_users_"),
25125 "expected sqlite_autoindex_ name; got {}",
25126 uqs[0].name
25127 );
25128 }
25129
25130 #[cfg(feature = "sql-sqlite")]
25131 #[test]
25132 fn list_sql_unique_constraints_composite_table_constraint() {
25133 let conn = make_sql_test_conn();
25134 super::SqlConnection::execute_batch(
25135 &conn,
25136 "CREATE TABLE rolling ( \
25137 year INTEGER, month INTEGER, code TEXT, val REAL, \
25138 UNIQUE (year, month, code) \
25139 );",
25140 )
25141 .unwrap();
25142 let uqs = list_sql_unique_constraints(&conn, "rolling", None).unwrap();
25143 assert_eq!(uqs.len(), 1);
25144 assert_eq!(uqs[0].columns, vec!["year", "month", "code"]);
25145 }
25146
25147 #[cfg(feature = "sql-sqlite")]
25148 #[test]
25149 fn list_sql_unique_constraints_disjoint_from_create_unique_index() {
25150 let conn = make_sql_test_conn();
25154 super::SqlConnection::execute_batch(
25155 &conn,
25156 "CREATE TABLE mixed ( \
25157 a INTEGER, \
25158 b TEXT, \
25159 c TEXT, \
25160 UNIQUE (a) \
25161 );",
25162 )
25163 .unwrap();
25164 super::SqlConnection::execute_batch(&conn, "CREATE UNIQUE INDEX idx_mixed_b ON mixed (b);")
25165 .unwrap();
25166
25167 let uqs = list_sql_unique_constraints(&conn, "mixed", None).unwrap();
25168 let idxs = list_sql_indexes(&conn, "mixed", None).unwrap();
25169
25170 assert_eq!(uqs.len(), 1);
25172 assert_eq!(uqs[0].columns, vec!["a"]);
25173 assert_eq!(idxs.len(), 1);
25175 assert_eq!(idxs[0].name, "idx_mixed_b");
25176 assert!(idxs[0].unique);
25177 assert_eq!(idxs[0].columns, vec!["b"]);
25178
25179 assert!(!uqs.iter().any(|u| u.name == "idx_mixed_b"));
25181 assert!(!idxs.iter().any(|i| i.name.starts_with("sqlite_autoindex_")));
25182 }
25183
25184 #[cfg(feature = "sql-sqlite")]
25185 #[test]
25186 fn list_sql_unique_constraints_rejects_invalid_table_name() {
25187 let conn = make_sql_test_conn();
25188 let err = list_sql_unique_constraints(&conn, "x; DROP TABLE users", None)
25189 .expect_err("must reject invalid identifier");
25190 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
25191 }
25192
25193 #[test]
25194 fn list_sql_unique_constraints_default_impl_returns_empty() {
25195 struct NoIntrospection;
25196 impl super::SqlConnection for NoIntrospection {
25197 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25198 Ok(SqlQueryResult {
25199 columns: vec![],
25200 rows: vec![],
25201 })
25202 }
25203 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25204 Ok(())
25205 }
25206 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25207 Ok(false)
25208 }
25209 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25210 Ok(())
25211 }
25212 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25213 "TEXT"
25214 }
25215 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25216 "TEXT"
25217 }
25218 }
25219 assert!(
25220 list_sql_unique_constraints(&NoIntrospection, "anything", None)
25221 .unwrap()
25222 .is_empty()
25223 );
25224 }
25225
25226 #[test]
25227 fn list_sql_unique_constraints_routes_to_backend_override() {
25228 struct MultiSchemaUq;
25229 impl super::SqlConnection for MultiSchemaUq {
25230 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25231 Ok(SqlQueryResult {
25232 columns: vec![],
25233 rows: vec![],
25234 })
25235 }
25236 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25237 Ok(())
25238 }
25239 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25240 Ok(false)
25241 }
25242 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25243 Ok(())
25244 }
25245 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25246 "TEXT"
25247 }
25248 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25249 "TEXT"
25250 }
25251 fn supports_schemas(&self) -> bool {
25252 true
25253 }
25254 fn list_unique_constraints(
25255 &self,
25256 table: &str,
25257 schema: Option<&str>,
25258 ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
25259 if table == "users" && schema == Some("public") {
25260 Ok(vec![SqlUniqueConstraintSchema {
25261 name: "users_email_key".to_owned(),
25262 columns: vec!["email".to_owned()],
25263 }])
25264 } else {
25265 Ok(vec![])
25266 }
25267 }
25268 }
25269 let conn = MultiSchemaUq;
25270 let uqs = list_sql_unique_constraints(&conn, "users", Some("public")).unwrap();
25271 assert_eq!(uqs.len(), 1);
25272 assert_eq!(uqs[0].name, "users_email_key");
25273 assert!(
25274 list_sql_unique_constraints(&conn, "users", Some("audit"))
25275 .unwrap()
25276 .is_empty()
25277 );
25278 }
25279
25280 #[cfg(feature = "sql-sqlite")]
25283 #[test]
25284 fn sql_table_comment_returns_none_on_sqlite() {
25285 let conn = make_sql_test_conn();
25288 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25289 let comment = sql_table_comment(&conn, "t", None).unwrap();
25290 assert!(comment.is_none());
25291 }
25292
25293 #[cfg(feature = "sql-sqlite")]
25294 #[test]
25295 fn sql_table_comment_returns_none_on_sqlite_for_unknown_table() {
25296 let conn = make_sql_test_conn();
25297 let comment = sql_table_comment(&conn, "no_such", None).unwrap();
25298 assert!(comment.is_none());
25299 }
25300
25301 #[test]
25302 fn sql_table_comment_default_impl_returns_none() {
25303 struct NoIntrospection;
25304 impl super::SqlConnection for NoIntrospection {
25305 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25306 Ok(SqlQueryResult {
25307 columns: vec![],
25308 rows: vec![],
25309 })
25310 }
25311 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25312 Ok(())
25313 }
25314 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25315 Ok(false)
25316 }
25317 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25318 Ok(())
25319 }
25320 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25321 "TEXT"
25322 }
25323 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25324 "TEXT"
25325 }
25326 }
25327 let conn = NoIntrospection;
25328 assert!(
25329 sql_table_comment(&conn, "anything", None)
25330 .unwrap()
25331 .is_none()
25332 );
25333 }
25334
25335 #[test]
25336 fn sql_table_comment_routes_to_backend_override() {
25337 struct PgLikeStub;
25338 impl super::SqlConnection for PgLikeStub {
25339 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25340 Ok(SqlQueryResult {
25341 columns: vec![],
25342 rows: vec![],
25343 })
25344 }
25345 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25346 Ok(())
25347 }
25348 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25349 Ok(false)
25350 }
25351 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25352 Ok(())
25353 }
25354 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25355 "TEXT"
25356 }
25357 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25358 "TEXT"
25359 }
25360 fn supports_schemas(&self) -> bool {
25361 true
25362 }
25363 fn table_comment(
25364 &self,
25365 table: &str,
25366 schema: Option<&str>,
25367 ) -> Result<Option<String>, IoError> {
25368 if table == "users" && schema == Some("public") {
25369 Ok(Some("Customer accounts table".to_owned()))
25370 } else {
25371 Ok(None)
25372 }
25373 }
25374 }
25375 let conn = PgLikeStub;
25376 assert_eq!(
25377 sql_table_comment(&conn, "users", Some("public"))
25378 .unwrap()
25379 .as_deref(),
25380 Some("Customer accounts table")
25381 );
25382 assert!(
25383 sql_table_comment(&conn, "users", Some("audit"))
25384 .unwrap()
25385 .is_none()
25386 );
25387 assert!(
25388 sql_table_comment(&conn, "missing", Some("public"))
25389 .unwrap()
25390 .is_none()
25391 );
25392 }
25393
25394 #[test]
25395 fn sql_table_comment_propagates_backend_error() {
25396 struct BrokenIntrospection;
25397 impl super::SqlConnection for BrokenIntrospection {
25398 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25399 Ok(SqlQueryResult {
25400 columns: vec![],
25401 rows: vec![],
25402 })
25403 }
25404 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25405 Ok(())
25406 }
25407 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25408 Ok(false)
25409 }
25410 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25411 Ok(())
25412 }
25413 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25414 "TEXT"
25415 }
25416 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25417 "TEXT"
25418 }
25419 fn table_comment(
25420 &self,
25421 _table: &str,
25422 _schema: Option<&str>,
25423 ) -> Result<Option<String>, IoError> {
25424 Err(IoError::Sql(
25425 "permission denied for pg_description".to_owned(),
25426 ))
25427 }
25428 }
25429 let conn = BrokenIntrospection;
25430 let err =
25431 sql_table_comment(&conn, "anything", None).expect_err("backend error must surface");
25432 assert!(matches!(err, IoError::Sql(msg) if msg.contains("permission denied")));
25433 }
25434
25435 #[cfg(feature = "sql-sqlite")]
25438 #[test]
25439 fn write_sql_chunksize_zero_rejected() {
25440 let conn = make_sql_test_conn();
25441 let frame =
25442 fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
25443 let err = write_sql_with_options(
25444 &frame,
25445 &conn,
25446 "t",
25447 &SqlWriteOptions {
25448 if_exists: SqlIfExists::Fail,
25449 index: false,
25450 index_label: None,
25451 schema: None,
25452 dtype: None,
25453 method: SqlInsertMethod::Single,
25454 chunksize: Some(0),
25455 },
25456 )
25457 .expect_err("chunksize=0 must be rejected");
25458 assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
25459 }
25460
25461 #[cfg(feature = "sql-sqlite")]
25462 #[test]
25463 fn write_sql_chunksize_none_preserves_single_transaction_semantics() {
25464 let conn = make_sql_test_conn();
25467 let frame = fp_frame::DataFrame::from_dict(
25468 &["id"],
25469 vec![(
25470 "id",
25471 vec![
25472 Scalar::Int64(1),
25473 Scalar::Int64(2),
25474 Scalar::Int64(3),
25475 Scalar::Int64(4),
25476 Scalar::Int64(5),
25477 ],
25478 )],
25479 )
25480 .unwrap();
25481 write_sql_with_options(
25482 &frame,
25483 &conn,
25484 "no_chunk",
25485 &SqlWriteOptions {
25486 if_exists: SqlIfExists::Fail,
25487 index: false,
25488 index_label: None,
25489 schema: None,
25490 dtype: None,
25491 method: SqlInsertMethod::Single,
25492 chunksize: None,
25493 },
25494 )
25495 .unwrap();
25496 let count =
25497 super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM no_chunk", &[]).unwrap();
25498 assert_eq!(count.rows[0][0], Scalar::Int64(5));
25499 }
25500
25501 #[cfg(feature = "sql-sqlite")]
25502 #[test]
25503 fn write_sql_single_chunksize_round_trips_all_rows() {
25504 let conn = make_sql_test_conn();
25508 let frame = fp_frame::DataFrame::from_dict(
25509 &["id"],
25510 vec![(
25511 "id",
25512 vec![
25513 Scalar::Int64(1),
25514 Scalar::Int64(2),
25515 Scalar::Int64(3),
25516 Scalar::Int64(4),
25517 Scalar::Int64(5),
25518 ],
25519 )],
25520 )
25521 .unwrap();
25522 write_sql_with_options(
25523 &frame,
25524 &conn,
25525 "chunked",
25526 &SqlWriteOptions {
25527 if_exists: SqlIfExists::Fail,
25528 index: false,
25529 index_label: None,
25530 schema: None,
25531 dtype: None,
25532 method: SqlInsertMethod::Single,
25533 chunksize: Some(2),
25534 },
25535 )
25536 .unwrap();
25537 let result =
25538 super::SqlConnection::query(&conn, "SELECT id FROM chunked ORDER BY id", &[]).unwrap();
25539 let ids: Vec<i64> = result
25540 .rows
25541 .iter()
25542 .map(|r| match &r[0] {
25543 Scalar::Int64(v) => *v,
25544 other => unreachable!("unexpected scalar: {other:?}"),
25545 })
25546 .collect();
25547 assert_eq!(ids, vec![1, 2, 3, 4, 5]);
25548 }
25549
25550 #[test]
25551 fn write_sql_single_chunksize_dispatches_correct_chunk_counts() {
25552 use std::cell::RefCell;
25554 struct Recorder {
25555 row_counts: RefCell<Vec<usize>>,
25556 }
25557 impl super::SqlConnection for Recorder {
25558 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25559 Ok(SqlQueryResult {
25560 columns: vec![],
25561 rows: vec![],
25562 })
25563 }
25564 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25565 Ok(())
25566 }
25567 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25568 Ok(false)
25569 }
25570 fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25571 self.row_counts.borrow_mut().push(rows.len());
25572 Ok(())
25573 }
25574 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25575 "TEXT"
25576 }
25577 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25578 "TEXT"
25579 }
25580 }
25581 let conn = Recorder {
25582 row_counts: RefCell::new(vec![]),
25583 };
25584 let frame = fp_frame::DataFrame::from_dict(
25585 &["x"],
25586 vec![(
25587 "x",
25588 vec![
25589 Scalar::Int64(1),
25590 Scalar::Int64(2),
25591 Scalar::Int64(3),
25592 Scalar::Int64(4),
25593 Scalar::Int64(5),
25594 ],
25595 )],
25596 )
25597 .unwrap();
25598 write_sql_with_options(
25599 &frame,
25600 &conn,
25601 "chunked",
25602 &SqlWriteOptions {
25603 if_exists: SqlIfExists::Fail,
25604 index: false,
25605 index_label: None,
25606 schema: None,
25607 dtype: None,
25608 method: SqlInsertMethod::Single,
25609 chunksize: Some(2),
25610 },
25611 )
25612 .unwrap();
25613 assert_eq!(*conn.row_counts.borrow(), vec![2usize, 2, 1]);
25616 }
25617
25618 #[test]
25619 fn write_sql_multi_chunksize_takes_min_with_param_cap() {
25620 use std::cell::RefCell;
25625 struct ParamCapRecorder {
25626 row_counts: RefCell<Vec<usize>>,
25627 }
25628 impl super::SqlConnection for ParamCapRecorder {
25629 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25630 Ok(SqlQueryResult {
25631 columns: vec![],
25632 rows: vec![],
25633 })
25634 }
25635 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25636 Ok(())
25637 }
25638 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25639 Ok(false)
25640 }
25641 fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25642 self.row_counts
25644 .borrow_mut()
25645 .push(rows.first().map_or(0, std::vec::Vec::len));
25646 Ok(())
25647 }
25648 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25649 "TEXT"
25650 }
25651 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25652 "TEXT"
25653 }
25654 fn max_param_count(&self) -> Option<usize> {
25655 Some(10)
25656 }
25657 }
25658 let conn = ParamCapRecorder {
25659 row_counts: RefCell::new(vec![]),
25660 };
25661 let frame = fp_frame::DataFrame::from_dict(
25662 &["a", "b"],
25663 vec![
25664 (
25665 "a",
25666 vec![
25667 Scalar::Int64(1),
25668 Scalar::Int64(2),
25669 Scalar::Int64(3),
25670 Scalar::Int64(4),
25671 Scalar::Int64(5),
25672 ],
25673 ),
25674 (
25675 "b",
25676 vec![
25677 Scalar::Int64(10),
25678 Scalar::Int64(20),
25679 Scalar::Int64(30),
25680 Scalar::Int64(40),
25681 Scalar::Int64(50),
25682 ],
25683 ),
25684 ],
25685 )
25686 .unwrap();
25687 write_sql_with_options(
25688 &frame,
25689 &conn,
25690 "chunked",
25691 &SqlWriteOptions {
25692 if_exists: SqlIfExists::Fail,
25693 index: false,
25694 index_label: None,
25695 schema: None,
25696 dtype: None,
25697 method: SqlInsertMethod::Multi,
25698 chunksize: Some(3),
25699 },
25700 )
25701 .unwrap();
25702 assert_eq!(*conn.row_counts.borrow(), vec![6usize, 4]);
25705 }
25706
25707 #[test]
25708 fn write_sql_multi_chunksize_param_cap_wins_when_smaller() {
25709 use std::cell::RefCell;
25712 struct TightCap {
25713 row_counts: RefCell<Vec<usize>>,
25714 }
25715 impl super::SqlConnection for TightCap {
25716 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25717 Ok(SqlQueryResult {
25718 columns: vec![],
25719 rows: vec![],
25720 })
25721 }
25722 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25723 Ok(())
25724 }
25725 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25726 Ok(false)
25727 }
25728 fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25729 self.row_counts
25730 .borrow_mut()
25731 .push(rows.first().map_or(0, std::vec::Vec::len));
25732 Ok(())
25733 }
25734 fn dtype_sql(&self, _dtype: DType) -> &'static str {
25735 "TEXT"
25736 }
25737 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25738 "TEXT"
25739 }
25740 fn max_param_count(&self) -> Option<usize> {
25741 Some(4)
25742 }
25743 }
25744 let conn = TightCap {
25745 row_counts: RefCell::new(vec![]),
25746 };
25747 let frame = fp_frame::DataFrame::from_dict(
25748 &["a", "b"],
25749 vec![
25750 (
25751 "a",
25752 vec![
25753 Scalar::Int64(1),
25754 Scalar::Int64(2),
25755 Scalar::Int64(3),
25756 Scalar::Int64(4),
25757 Scalar::Int64(5),
25758 ],
25759 ),
25760 (
25761 "b",
25762 vec![
25763 Scalar::Int64(10),
25764 Scalar::Int64(20),
25765 Scalar::Int64(30),
25766 Scalar::Int64(40),
25767 Scalar::Int64(50),
25768 ],
25769 ),
25770 ],
25771 )
25772 .unwrap();
25773 write_sql_with_options(
25774 &frame,
25775 &conn,
25776 "chunked",
25777 &SqlWriteOptions {
25778 if_exists: SqlIfExists::Fail,
25779 index: false,
25780 index_label: None,
25781 schema: None,
25782 dtype: None,
25783 method: SqlInsertMethod::Multi,
25784 chunksize: Some(10),
25785 },
25786 )
25787 .unwrap();
25788 assert_eq!(*conn.row_counts.borrow(), vec![4usize, 4, 2]);
25790 }
25791
25792 #[cfg(feature = "sql-sqlite")]
25795 #[test]
25796 fn read_sql_table_with_options_columns_none_selects_all() {
25797 let conn = make_sql_test_conn();
25798 super::SqlConnection::execute_batch(
25799 &conn,
25800 "CREATE TABLE projection_default (a INTEGER, b TEXT, c REAL);",
25801 )
25802 .unwrap();
25803 super::SqlConnection::execute_batch(
25804 &conn,
25805 "INSERT INTO projection_default VALUES (1, 'x', 1.5);",
25806 )
25807 .unwrap();
25808 let frame = read_sql_table_with_options(
25809 &conn,
25810 "projection_default",
25811 &SqlReadOptions {
25812 columns: None,
25813 ..Default::default()
25814 },
25815 )
25816 .unwrap();
25817 assert_eq!(frame.column_names(), vec!["a", "b", "c"]);
25818 }
25819
25820 #[cfg(feature = "sql-sqlite")]
25821 #[test]
25822 fn read_sql_table_with_options_columns_projects_subset() {
25823 let conn = make_sql_test_conn();
25824 super::SqlConnection::execute_batch(
25825 &conn,
25826 "CREATE TABLE projection (id INTEGER, name TEXT, ts TEXT, value REAL);",
25827 )
25828 .unwrap();
25829 super::SqlConnection::execute_batch(
25830 &conn,
25831 "INSERT INTO projection VALUES (1, 'a', '2024-01-01', 1.5), \
25832 (2, 'b', '2024-01-02', 2.5);",
25833 )
25834 .unwrap();
25835 let frame = read_sql_table_with_options(
25836 &conn,
25837 "projection",
25838 &SqlReadOptions {
25839 columns: Some(vec!["id".to_owned(), "name".to_owned()]),
25840 ..Default::default()
25841 },
25842 )
25843 .unwrap();
25844 assert_eq!(frame.column_names(), vec!["id", "name"]);
25846 assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
25847 assert_eq!(
25848 frame.column("name").unwrap().values()[0],
25849 Scalar::Utf8("a".into())
25850 );
25851 }
25852
25853 #[cfg(feature = "sql-sqlite")]
25854 #[test]
25855 fn read_sql_table_with_options_columns_preserves_specified_order() {
25856 let conn = make_sql_test_conn();
25859 super::SqlConnection::execute_batch(
25860 &conn,
25861 "CREATE TABLE ordered_proj (a INT, b INT, c INT);",
25862 )
25863 .unwrap();
25864 super::SqlConnection::execute_batch(&conn, "INSERT INTO ordered_proj VALUES (1, 2, 3);")
25865 .unwrap();
25866 let frame = read_sql_table_with_options(
25867 &conn,
25868 "ordered_proj",
25869 &SqlReadOptions {
25870 columns: Some(vec!["c".to_owned(), "a".to_owned()]),
25871 ..Default::default()
25872 },
25873 )
25874 .unwrap();
25875 assert_eq!(frame.column_names(), vec!["c", "a"]);
25876 }
25877
25878 #[cfg(feature = "sql-sqlite")]
25879 #[test]
25880 fn read_sql_table_with_options_columns_empty_vec_rejected() {
25881 let conn = make_sql_test_conn();
25882 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25883 let err = read_sql_table_with_options(
25884 &conn,
25885 "t",
25886 &SqlReadOptions {
25887 columns: Some(vec![]),
25888 ..Default::default()
25889 },
25890 )
25891 .expect_err("empty columns must be rejected");
25892 assert!(matches!(err, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
25893 }
25894
25895 #[cfg(feature = "sql-sqlite")]
25896 #[test]
25897 fn read_sql_table_with_options_columns_invalid_name_rejected() {
25898 let conn = make_sql_test_conn();
25899 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25900 let err = read_sql_table_with_options(
25901 &conn,
25902 "t",
25903 &SqlReadOptions {
25904 columns: Some(vec!["x; DROP TABLE t".to_owned()]),
25905 ..Default::default()
25906 },
25907 )
25908 .expect_err("invalid column name must be rejected");
25909 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
25910 }
25911
25912 #[cfg(feature = "sql-sqlite")]
25913 #[test]
25914 fn read_sql_table_with_options_columns_combines_with_parse_dates() {
25915 let conn = make_sql_test_conn();
25917 super::SqlConnection::execute_batch(
25918 &conn,
25919 "CREATE TABLE events (id INT, ts TEXT, note TEXT);",
25920 )
25921 .unwrap();
25922 super::SqlConnection::execute_batch(
25923 &conn,
25924 "INSERT INTO events VALUES (1, '2024-01-15', 'launched');",
25925 )
25926 .unwrap();
25927 let frame = read_sql_table_with_options(
25928 &conn,
25929 "events",
25930 &SqlReadOptions {
25931 columns: Some(vec!["id".to_owned(), "ts".to_owned()]),
25932 index_col: None,
25933 parse_dates: Some(vec!["ts".to_owned()]),
25934 ..Default::default()
25935 },
25936 )
25937 .unwrap();
25938 assert_eq!(frame.column_names(), vec!["id", "ts"]);
25942 assert_eq!(
25943 frame.column("ts").unwrap().values()[0],
25944 Scalar::Utf8("2024-01-15 00:00:00".to_owned())
25945 );
25946 }
25947
25948 #[cfg(feature = "sql-sqlite")]
25949 #[test]
25950 fn read_sql_table_chunks_with_options_columns_projects_before_chunking() {
25951 let conn = make_sql_test_conn();
25952 super::SqlConnection::execute_batch(
25953 &conn,
25954 "CREATE TABLE chunk_projection (id INTEGER, name TEXT, hidden REAL);",
25955 )
25956 .unwrap();
25957 super::SqlConnection::execute_batch(
25958 &conn,
25959 "INSERT INTO chunk_projection VALUES \
25960 (1, 'a', 10.0), \
25961 (2, 'b', 20.0), \
25962 (3, 'c', 30.0);",
25963 )
25964 .unwrap();
25965
25966 let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options(
25967 &conn,
25968 "chunk_projection",
25969 &SqlReadOptions {
25970 columns: Some(vec!["name".to_owned(), "id".to_owned()]),
25971 ..Default::default()
25972 },
25973 2,
25974 )
25975 .unwrap()
25976 .collect::<Result<Vec<_>, _>>()
25977 .unwrap();
25978
25979 assert_eq!(chunks.len(), 2);
25980 assert_eq!(chunks[0].column_names(), vec!["name", "id"]);
25981 assert_eq!(chunks[1].column_names(), vec!["name", "id"]);
25982 assert_eq!(
25983 chunks[0].column("name").unwrap().values(),
25984 &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
25985 );
25986 assert_eq!(
25987 chunks[1].column("id").unwrap().values(),
25988 &[Scalar::Int64(3)]
25989 );
25990 assert!(chunks[0].column("hidden").is_none());
25991 }
25992
25993 #[test]
25994 fn read_sql_table_chunks_with_options_schema_projects_before_chunking() {
25995 use std::cell::RefCell;
25996
25997 struct MultiSchemaProjectedChunks {
25998 queries: RefCell<Vec<String>>,
25999 }
26000
26001 impl super::SqlConnection for MultiSchemaProjectedChunks {
26002 fn query(&self, query: &str, _params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26003 self.queries.borrow_mut().push(query.to_owned());
26004 Ok(SqlQueryResult {
26005 columns: vec!["name".to_owned(), "id".to_owned()],
26006 rows: vec![
26007 vec![Scalar::Utf8("a".to_owned()), Scalar::Int64(1)],
26008 vec![Scalar::Utf8("b".to_owned()), Scalar::Int64(2)],
26009 vec![Scalar::Utf8("c".to_owned()), Scalar::Int64(3)],
26010 ],
26011 })
26012 }
26013
26014 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26015 Ok(())
26016 }
26017
26018 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26019 Ok(false)
26020 }
26021
26022 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26023 Ok(())
26024 }
26025
26026 fn dtype_sql(&self, _dtype: DType) -> &'static str {
26027 "TEXT"
26028 }
26029
26030 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26031 "TEXT"
26032 }
26033
26034 fn supports_schemas(&self) -> bool {
26035 true
26036 }
26037 }
26038
26039 let conn = MultiSchemaProjectedChunks {
26040 queries: RefCell::new(Vec::new()),
26041 };
26042
26043 let chunks: Vec<DataFrame> = super::read_sql_table_chunks_with_options(
26044 &conn,
26045 "events",
26046 &SqlReadOptions {
26047 schema: Some("analytics".to_owned()),
26048 columns: Some(vec!["name".to_owned(), "id".to_owned()]),
26049 ..Default::default()
26050 },
26051 2,
26052 )
26053 .unwrap()
26054 .collect::<Result<Vec<_>, _>>()
26055 .unwrap();
26056
26057 assert_eq!(
26058 conn.queries.borrow().as_slice(),
26059 &["SELECT \"name\", \"id\" FROM \"analytics\".\"events\"".to_owned()]
26060 );
26061 assert_eq!(chunks.len(), 2);
26062 assert_eq!(chunks[0].column_names(), vec!["name", "id"]);
26063 assert_eq!(chunks[1].column_names(), vec!["name", "id"]);
26064 assert_eq!(
26065 chunks[0].index().labels(),
26066 &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
26067 );
26068 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
26069 assert_eq!(
26070 chunks[0].column("name").unwrap().values(),
26071 &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
26072 );
26073 assert_eq!(
26074 chunks[1].column("id").unwrap().values(),
26075 &[Scalar::Int64(3)]
26076 );
26077 }
26078
26079 #[cfg(feature = "sql-sqlite")]
26082 #[test]
26083 fn sql_table_schema_comment_is_none_on_sqlite() {
26084 let conn = make_sql_test_conn();
26087 super::SqlConnection::execute_batch(&conn, "CREATE TABLE labeled (id INTEGER, name TEXT);")
26088 .unwrap();
26089 let schema = sql_table_schema(&conn, "labeled", None).unwrap().unwrap();
26090 for col in &schema.columns {
26091 assert!(
26092 col.comment.is_none(),
26093 "SQLite should report no column comment; got {:?} on {}",
26094 col.comment,
26095 col.name
26096 );
26097 }
26098 }
26099
26100 #[test]
26101 fn sql_table_schema_comment_routes_to_backend_override() {
26102 struct PgLikeWithComments;
26104 impl super::SqlConnection for PgLikeWithComments {
26105 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26106 Ok(SqlQueryResult {
26107 columns: vec![],
26108 rows: vec![],
26109 })
26110 }
26111 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26112 Ok(())
26113 }
26114 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26115 Ok(false)
26116 }
26117 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26118 Ok(())
26119 }
26120 fn dtype_sql(&self, _dtype: DType) -> &'static str {
26121 "TEXT"
26122 }
26123 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26124 "TEXT"
26125 }
26126 fn supports_schemas(&self) -> bool {
26127 true
26128 }
26129 fn table_schema(
26130 &self,
26131 table: &str,
26132 _schema: Option<&str>,
26133 ) -> Result<Option<SqlTableSchema>, IoError> {
26134 if table == "users" {
26135 Ok(Some(SqlTableSchema {
26136 table_name: "users".to_owned(),
26137 columns: vec![
26138 SqlColumnSchema {
26139 name: "id".to_owned(),
26140 declared_type: Some("BIGINT".to_owned()),
26141 nullable: false,
26142 default_value: None,
26143 primary_key_ordinal: Some(0),
26144 comment: Some("Surrogate primary key".to_owned()),
26145 autoincrement: false,
26146 },
26147 SqlColumnSchema {
26148 name: "email".to_owned(),
26149 declared_type: Some("TEXT".to_owned()),
26150 nullable: false,
26151 default_value: None,
26152 primary_key_ordinal: None,
26153 comment: Some("Login identifier".to_owned()),
26154 autoincrement: false,
26155 },
26156 SqlColumnSchema {
26157 name: "name".to_owned(),
26158 declared_type: Some("TEXT".to_owned()),
26159 nullable: true,
26160 default_value: None,
26161 primary_key_ordinal: None,
26162 comment: None,
26163 autoincrement: false,
26164 },
26165 ],
26166 }))
26167 } else {
26168 Ok(None)
26169 }
26170 }
26171 }
26172 let conn = PgLikeWithComments;
26173 let schema = sql_table_schema(&conn, "users", None).unwrap().unwrap();
26174 let id = schema.column("id").unwrap();
26175 assert_eq!(id.comment.as_deref(), Some("Surrogate primary key"));
26176 let email = schema.column("email").unwrap();
26177 assert_eq!(email.comment.as_deref(), Some("Login identifier"));
26178 let name = schema.column("name").unwrap();
26180 assert!(name.comment.is_none());
26181 }
26182
26183 #[cfg(feature = "sql-sqlite")]
26186 #[test]
26187 fn read_sql_with_options_index_col_none_keeps_range_index() {
26188 let conn = make_sql_test_conn();
26189 super::SqlConnection::execute_batch(&conn, "CREATE TABLE keyed (id INTEGER, val INTEGER);")
26190 .unwrap();
26191 super::SqlConnection::execute_batch(&conn, "INSERT INTO keyed VALUES (1, 10), (2, 20);")
26192 .unwrap();
26193 let frame = read_sql_with_options(
26194 &conn,
26195 "SELECT id, val FROM keyed ORDER BY id",
26196 &SqlReadOptions {
26197 index_col: None,
26198 ..Default::default()
26199 },
26200 )
26201 .unwrap();
26202 assert_eq!(frame.index().len(), 2);
26204 assert_eq!(frame.column_names(), vec!["id", "val"]);
26205 }
26206
26207 #[cfg(feature = "sql-sqlite")]
26208 #[test]
26209 fn read_sql_with_options_index_col_promotes_named_column() {
26210 let conn = make_sql_test_conn();
26211 super::SqlConnection::execute_batch(&conn, "CREATE TABLE keyed (id INTEGER, val INTEGER);")
26212 .unwrap();
26213 super::SqlConnection::execute_batch(&conn, "INSERT INTO keyed VALUES (10, 1), (20, 2);")
26214 .unwrap();
26215 let frame = read_sql_with_options(
26216 &conn,
26217 "SELECT id, val FROM keyed ORDER BY id",
26218 &SqlReadOptions {
26219 index_col: Some("id".to_owned()),
26220 ..Default::default()
26221 },
26222 )
26223 .unwrap();
26224 assert_eq!(frame.column_names(), vec!["val"]);
26226 assert_eq!(frame.index().len(), 2);
26227 let labels: Vec<i64> = frame
26229 .index()
26230 .labels()
26231 .iter()
26232 .filter_map(|l| match l {
26233 IndexLabel::Int64(v) => Some(*v),
26234 _ => None,
26235 })
26236 .collect();
26237 assert_eq!(labels, vec![10, 20]);
26238 }
26239
26240 #[cfg(feature = "sql-sqlite")]
26241 #[test]
26242 fn read_sql_with_options_index_col_missing_column_errors() {
26243 let conn = make_sql_test_conn();
26244 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (a INTEGER);").unwrap();
26245 super::SqlConnection::execute_batch(&conn, "INSERT INTO t VALUES (1);").unwrap();
26246 let err = read_sql_with_options(
26247 &conn,
26248 "SELECT a FROM t",
26249 &SqlReadOptions {
26250 index_col: Some("nonexistent".to_owned()),
26251 ..Default::default()
26252 },
26253 )
26254 .expect_err("missing index_col must error");
26255 assert!(matches!(err, IoError::Sql(msg) if msg.contains("not present")));
26256 }
26257
26258 #[cfg(feature = "sql-sqlite")]
26259 #[test]
26260 fn read_sql_with_options_index_col_empty_string_rejected() {
26261 let conn = make_sql_test_conn();
26262 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (a INTEGER);").unwrap();
26263 let err = read_sql_with_options(
26264 &conn,
26265 "SELECT a FROM t",
26266 &SqlReadOptions {
26267 index_col: Some(String::new()),
26268 ..Default::default()
26269 },
26270 )
26271 .expect_err("empty index_col must be rejected");
26272 assert!(matches!(err, IoError::Sql(msg) if msg.contains("empty string")));
26273 }
26274
26275 #[cfg(feature = "sql-sqlite")]
26276 #[test]
26277 fn read_sql_explicit_index_col_empty_string_rejected_across_entrypoints() {
26278 fn assert_empty_index_col(err: IoError) {
26279 assert!(
26280 matches!(err, IoError::Sql(ref msg) if msg.contains("empty string")),
26281 "expected empty index_col error, got {err:?}"
26282 );
26283 }
26284
26285 let conn = make_sql_test_conn();
26286 super::SqlConnection::execute_batch(
26287 &conn,
26288 "CREATE TABLE explicit_idx (a INTEGER, b TEXT);
26289 INSERT INTO explicit_idx VALUES (1, 'x'), (2, 'y');",
26290 )
26291 .unwrap();
26292
26293 assert_empty_index_col(
26294 read_sql_with_index_col(&conn, "SELECT a, b FROM explicit_idx", Some(""))
26295 .expect_err("empty explicit read_sql index_col must be rejected"),
26296 );
26297 assert_empty_index_col(
26298 read_sql_query_with_options_and_index_col(
26299 &conn,
26300 "SELECT a, b FROM explicit_idx",
26301 &SqlReadOptions::default(),
26302 Some(""),
26303 )
26304 .expect_err("empty explicit read_sql_query index_col must be rejected"),
26305 );
26306 assert_empty_index_col(
26307 read_sql_query_chunks_with_options_and_index_col(
26308 &conn,
26309 "SELECT a, b FROM explicit_idx",
26310 &SqlReadOptions::default(),
26311 Some(""),
26312 1,
26313 )
26314 .expect_err("empty explicit query chunk index_col must be rejected"),
26315 );
26316 assert_empty_index_col(
26317 read_sql_table_with_index_col(&conn, "explicit_idx", Some(""))
26318 .expect_err("empty explicit table index_col must be rejected"),
26319 );
26320 assert_empty_index_col(
26321 read_sql_table_with_options_and_index_col(
26322 &conn,
26323 "explicit_idx",
26324 &SqlReadOptions::default(),
26325 Some(""),
26326 )
26327 .expect_err("empty explicit table options index_col must be rejected"),
26328 );
26329 assert_empty_index_col(
26330 read_sql_table_columns_with_index_col(&conn, "explicit_idx", &["a"], Some(""))
26331 .expect_err("empty explicit table-columns index_col must be rejected"),
26332 );
26333 assert_empty_index_col(
26334 read_sql_table_columns_chunks_with_index_col(
26335 &conn,
26336 "explicit_idx",
26337 &["a"],
26338 Some(""),
26339 1,
26340 )
26341 .expect_err("empty explicit table-columns chunk index_col must be rejected"),
26342 );
26343 }
26344
26345 #[cfg(feature = "sql-sqlite")]
26346 #[test]
26347 fn read_sql_table_with_options_index_col_combines_with_columns_projection() {
26348 let conn = make_sql_test_conn();
26351 super::SqlConnection::execute_batch(
26352 &conn,
26353 "CREATE TABLE wide (id INTEGER, val INTEGER, ts TEXT, note TEXT);",
26354 )
26355 .unwrap();
26356 super::SqlConnection::execute_batch(
26357 &conn,
26358 "INSERT INTO wide VALUES (5, 100, 't1', 'first');",
26359 )
26360 .unwrap();
26361 let frame = read_sql_table_with_options(
26362 &conn,
26363 "wide",
26364 &SqlReadOptions {
26365 columns: Some(vec!["id".to_owned(), "val".to_owned()]),
26366 index_col: Some("id".to_owned()),
26367 ..Default::default()
26368 },
26369 )
26370 .unwrap();
26371 assert_eq!(frame.column_names(), vec!["val"]);
26372 let labels: Vec<i64> = frame
26373 .index()
26374 .labels()
26375 .iter()
26376 .filter_map(|l| match l {
26377 IndexLabel::Int64(v) => Some(*v),
26378 _ => None,
26379 })
26380 .collect();
26381 assert_eq!(labels, vec![5]);
26382 }
26383
26384 #[cfg(feature = "sql-sqlite")]
26385 #[test]
26386 fn read_sql_table_with_options_columns_auto_project_index_col() {
26387 let conn = make_sql_test_conn();
26388 super::SqlConnection::execute_batch(
26389 &conn,
26390 "CREATE TABLE projected_index (id INTEGER, val TEXT, hidden TEXT);",
26391 )
26392 .unwrap();
26393 super::SqlConnection::execute_batch(
26394 &conn,
26395 "INSERT INTO projected_index VALUES (10, 'a', 'x'), (20, 'b', 'y');",
26396 )
26397 .unwrap();
26398
26399 let frame = read_sql_table_with_options(
26400 &conn,
26401 "projected_index",
26402 &SqlReadOptions {
26403 columns: Some(vec!["val".to_owned()]),
26404 index_col: Some("id".to_owned()),
26405 ..Default::default()
26406 },
26407 )
26408 .unwrap();
26409
26410 assert_eq!(frame.column_names(), vec!["val"]);
26411 assert!(frame.column("id").is_none());
26412 assert!(frame.column("hidden").is_none());
26413 assert_eq!(
26414 frame.index().labels(),
26415 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
26416 );
26417 assert_eq!(
26418 frame.column("val").unwrap().values(),
26419 &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
26420 );
26421 }
26422
26423 #[cfg(feature = "sql-sqlite")]
26424 #[test]
26425 fn read_sql_table_chunks_with_options_columns_auto_project_index_col() {
26426 let conn = make_sql_test_conn();
26427 super::SqlConnection::execute_batch(
26428 &conn,
26429 "CREATE TABLE projected_index_chunks (id INTEGER, val TEXT, hidden TEXT);",
26430 )
26431 .unwrap();
26432 super::SqlConnection::execute_batch(
26433 &conn,
26434 "INSERT INTO projected_index_chunks VALUES \
26435 (10, 'a', 'x'), \
26436 (20, 'b', 'y'), \
26437 (30, 'c', 'z');",
26438 )
26439 .unwrap();
26440
26441 let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
26442 &conn,
26443 "projected_index_chunks",
26444 &SqlReadOptions {
26445 columns: Some(vec!["val".to_owned()]),
26446 ..Default::default()
26447 },
26448 Some("id"),
26449 2,
26450 )
26451 .unwrap()
26452 .collect::<Result<Vec<_>, _>>()
26453 .unwrap();
26454
26455 assert_eq!(chunks.len(), 2);
26456 assert_eq!(chunks[0].column_names(), vec!["val"]);
26457 assert!(chunks[0].column("id").is_none());
26458 assert!(chunks[0].column("hidden").is_none());
26459 assert_eq!(
26460 chunks[0].index().labels(),
26461 &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
26462 );
26463 assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
26464 assert_eq!(
26465 chunks[1].column("val").unwrap().values(),
26466 &[Scalar::Utf8("c".into())]
26467 );
26468 }
26469
26470 #[cfg(feature = "sql-sqlite")]
26471 #[test]
26472 fn read_sql_table_with_options_and_index_col_explicit_arg_wins_over_options() {
26473 let conn = make_sql_test_conn();
26477 super::SqlConnection::execute_batch(
26478 &conn,
26479 "CREATE TABLE both (a INTEGER, b INTEGER, c TEXT);",
26480 )
26481 .unwrap();
26482 super::SqlConnection::execute_batch(
26483 &conn,
26484 "INSERT INTO both VALUES (1, 100, 'x'), (2, 200, 'y');",
26485 )
26486 .unwrap();
26487 let frame = read_sql_table_with_options_and_index_col(
26488 &conn,
26489 "both",
26490 &SqlReadOptions {
26491 index_col: Some("a".to_owned()),
26492 ..Default::default()
26493 },
26494 Some("b"),
26495 )
26496 .unwrap();
26497 assert_eq!(frame.column_names(), vec!["a", "c"]);
26499 let labels: Vec<i64> = frame
26500 .index()
26501 .labels()
26502 .iter()
26503 .filter_map(|l| match l {
26504 IndexLabel::Int64(v) => Some(*v),
26505 _ => None,
26506 })
26507 .collect();
26508 assert_eq!(labels, vec![100, 200]);
26509 }
26510
26511 #[cfg(feature = "sql-sqlite")]
26514 #[test]
26515 fn sql_table_schema_autoincrement_detected_on_integer_primary_key() {
26516 let conn = make_sql_test_conn();
26519 super::SqlConnection::execute_batch(
26520 &conn,
26521 "CREATE TABLE auto_a (id INTEGER PRIMARY KEY, name TEXT);",
26522 )
26523 .unwrap();
26524 let schema = sql_table_schema(&conn, "auto_a", None).unwrap().unwrap();
26525 let id = schema.column("id").unwrap();
26526 assert!(
26527 id.autoincrement,
26528 "INTEGER PRIMARY KEY must be autoincrement; got {id:?}"
26529 );
26530 let name = schema.column("name").unwrap();
26531 assert!(
26532 !name.autoincrement,
26533 "non-PK column must not be autoincrement"
26534 );
26535 }
26536
26537 #[cfg(feature = "sql-sqlite")]
26538 #[test]
26539 fn sql_table_schema_autoincrement_detected_with_explicit_keyword() {
26540 let conn = make_sql_test_conn();
26543 super::SqlConnection::execute_batch(
26544 &conn,
26545 "CREATE TABLE auto_b (id INTEGER PRIMARY KEY AUTOINCREMENT, val TEXT);",
26546 )
26547 .unwrap();
26548 let schema = sql_table_schema(&conn, "auto_b", None).unwrap().unwrap();
26549 let id = schema.column("id").unwrap();
26550 assert!(id.autoincrement);
26551 }
26552
26553 #[cfg(feature = "sql-sqlite")]
26554 #[test]
26555 fn sql_table_schema_autoincrement_not_set_for_text_primary_key() {
26556 let conn = make_sql_test_conn();
26558 super::SqlConnection::execute_batch(
26559 &conn,
26560 "CREATE TABLE text_pk (code TEXT PRIMARY KEY, name TEXT);",
26561 )
26562 .unwrap();
26563 let schema = sql_table_schema(&conn, "text_pk", None).unwrap().unwrap();
26564 let code = schema.column("code").unwrap();
26565 assert!(
26566 !code.autoincrement,
26567 "TEXT PRIMARY KEY is not autoincrement; got {code:?}"
26568 );
26569 }
26570
26571 #[cfg(feature = "sql-sqlite")]
26572 #[test]
26573 fn sql_table_schema_autoincrement_not_set_for_non_pk_integer() {
26574 let conn = make_sql_test_conn();
26575 super::SqlConnection::execute_batch(
26576 &conn,
26577 "CREATE TABLE plain_int (val INTEGER, name TEXT);",
26578 )
26579 .unwrap();
26580 let schema = sql_table_schema(&conn, "plain_int", None).unwrap().unwrap();
26581 let val = schema.column("val").unwrap();
26582 assert!(!val.autoincrement, "non-PK INTEGER is not autoincrement");
26583 }
26584
26585 #[cfg(feature = "sql-sqlite")]
26586 #[test]
26587 fn sql_table_schema_autoincrement_not_set_for_composite_pk_integer() {
26588 let conn = make_sql_test_conn();
26593 super::SqlConnection::execute_batch(
26594 &conn,
26595 "CREATE TABLE composite_pk ( \
26596 year INTEGER NOT NULL, \
26597 month INTEGER NOT NULL, \
26598 code TEXT NOT NULL, \
26599 PRIMARY KEY (year, month, code) \
26600 );",
26601 )
26602 .unwrap();
26603 let schema = sql_table_schema(&conn, "composite_pk", None)
26604 .unwrap()
26605 .unwrap();
26606 let year = schema.column("year").unwrap();
26607 let month = schema.column("month").unwrap();
26608 let code = schema.column("code").unwrap();
26609 assert_eq!(year.primary_key_ordinal, Some(0));
26612 assert_eq!(month.primary_key_ordinal, Some(1));
26613 assert_eq!(code.primary_key_ordinal, Some(2));
26614 assert!(
26615 !year.autoincrement,
26616 "composite PK first col must not be autoincrement"
26617 );
26618 assert!(!month.autoincrement);
26619 assert!(!code.autoincrement);
26620 }
26621
26622 #[cfg(feature = "sql-sqlite")]
26623 #[test]
26624 fn sql_table_schema_autoincrement_two_pass_count_distinguishes_single_vs_composite() {
26625 let conn = make_sql_test_conn();
26628 super::SqlConnection::execute_batch(
26629 &conn,
26630 "CREATE TABLE single_int_pk (id INTEGER PRIMARY KEY, label TEXT);",
26631 )
26632 .unwrap();
26633 super::SqlConnection::execute_batch(
26634 &conn,
26635 "CREATE TABLE composite_int_pk ( \
26636 a INTEGER NOT NULL, \
26637 b INTEGER NOT NULL, \
26638 PRIMARY KEY (a, b) \
26639 );",
26640 )
26641 .unwrap();
26642
26643 let single = sql_table_schema(&conn, "single_int_pk", None)
26644 .unwrap()
26645 .unwrap();
26646 assert!(single.column("id").unwrap().autoincrement);
26647
26648 let composite = sql_table_schema(&conn, "composite_int_pk", None)
26649 .unwrap()
26650 .unwrap();
26651 assert!(!composite.column("a").unwrap().autoincrement);
26653 assert!(!composite.column("b").unwrap().autoincrement);
26654 }
26655
26656 #[test]
26657 fn sql_table_schema_autoincrement_routes_to_backend_override() {
26658 struct PgLikeAutoinc;
26661 impl super::SqlConnection for PgLikeAutoinc {
26662 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26663 Ok(SqlQueryResult {
26664 columns: vec![],
26665 rows: vec![],
26666 })
26667 }
26668 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26669 Ok(())
26670 }
26671 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26672 Ok(false)
26673 }
26674 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26675 Ok(())
26676 }
26677 fn dtype_sql(&self, _dtype: DType) -> &'static str {
26678 "TEXT"
26679 }
26680 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26681 "TEXT"
26682 }
26683 fn supports_schemas(&self) -> bool {
26684 true
26685 }
26686 fn table_schema(
26687 &self,
26688 table: &str,
26689 _schema: Option<&str>,
26690 ) -> Result<Option<SqlTableSchema>, IoError> {
26691 if table == "users" {
26692 Ok(Some(SqlTableSchema {
26693 table_name: "users".to_owned(),
26694 columns: vec![
26695 SqlColumnSchema {
26696 name: "id".to_owned(),
26697 declared_type: Some("BIGSERIAL".to_owned()),
26698 nullable: false,
26699 default_value: None,
26700 primary_key_ordinal: Some(0),
26701 comment: None,
26702 autoincrement: true,
26703 },
26704 SqlColumnSchema {
26705 name: "email".to_owned(),
26706 declared_type: Some("TEXT".to_owned()),
26707 nullable: false,
26708 default_value: None,
26709 primary_key_ordinal: None,
26710 comment: None,
26711 autoincrement: false,
26712 },
26713 ],
26714 }))
26715 } else {
26716 Ok(None)
26717 }
26718 }
26719 }
26720 let conn = PgLikeAutoinc;
26721 let schema = sql_table_schema(&conn, "users", None).unwrap().unwrap();
26722 assert!(schema.column("id").unwrap().autoincrement);
26723 assert!(!schema.column("email").unwrap().autoincrement);
26724 }
26725
26726 #[cfg(feature = "sql-sqlite")]
26729 #[test]
26730 fn sql_inspector_tables_views_schemas() {
26731 let conn = make_sql_test_conn();
26732 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t1 (x INTEGER);").unwrap();
26733 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t2 (y TEXT);").unwrap();
26734 super::SqlConnection::execute_batch(&conn, "CREATE VIEW v1 AS SELECT x FROM t1;").unwrap();
26735
26736 let inspector = SqlInspector::new(&conn);
26737 assert_eq!(inspector.tables(None).unwrap(), vec!["t1", "t2"]);
26738 assert_eq!(inspector.views(None).unwrap(), vec!["v1"]);
26739 assert!(inspector.schemas().unwrap().is_empty());
26741 }
26742
26743 #[cfg(feature = "sql-sqlite")]
26744 #[test]
26745 fn sql_inspector_columns_pk_indexes_fks() {
26746 let conn = make_sql_test_conn();
26747 super::SqlConnection::execute_batch(
26748 &conn,
26749 "CREATE TABLE parent (pid INTEGER PRIMARY KEY);",
26750 )
26751 .unwrap();
26752 super::SqlConnection::execute_batch(
26753 &conn,
26754 "CREATE TABLE child ( \
26755 cid INTEGER PRIMARY KEY, \
26756 parent_id INTEGER, \
26757 tag TEXT, \
26758 FOREIGN KEY (parent_id) REFERENCES parent(pid) \
26759 );",
26760 )
26761 .unwrap();
26762 super::SqlConnection::execute_batch(&conn, "CREATE INDEX idx_child_tag ON child(tag);")
26763 .unwrap();
26764
26765 let inspector = SqlInspector::new(&conn);
26766
26767 let schema = inspector.columns("child", None).unwrap().unwrap();
26769 let names: Vec<&str> = schema.columns.iter().map(|c| c.name.as_str()).collect();
26770 assert_eq!(names, vec!["cid", "parent_id", "tag"]);
26771
26772 let pk = inspector.primary_key_columns("child", None).unwrap();
26774 assert_eq!(pk, vec!["cid"]);
26775
26776 let indexes = inspector.indexes("child", None).unwrap();
26778 assert_eq!(indexes.len(), 1);
26779 assert_eq!(indexes[0].name, "idx_child_tag");
26780
26781 let fks = inspector.foreign_keys("child", None).unwrap();
26783 assert_eq!(fks.len(), 1);
26784 assert_eq!(fks[0].columns, vec!["parent_id"]);
26785 assert_eq!(fks[0].referenced_table, "parent");
26786 assert_eq!(fks[0].referenced_columns, vec!["pid"]);
26787 }
26788
26789 #[cfg(feature = "sql-sqlite")]
26790 #[test]
26791 fn sql_inspector_unique_constraints_and_table_exists() {
26792 let conn = make_sql_test_conn();
26793 super::SqlConnection::execute_batch(
26794 &conn,
26795 "CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT UNIQUE);",
26796 )
26797 .unwrap();
26798 let inspector = SqlInspector::new(&conn);
26799 let uqs = inspector.unique_constraints("users", None).unwrap();
26800 assert_eq!(uqs.len(), 1);
26801 assert_eq!(uqs[0].columns, vec!["email"]);
26802 assert!(inspector.table_exists("users", None).unwrap());
26803 assert!(!inspector.table_exists("not_there", None).unwrap());
26804 }
26805
26806 #[cfg(feature = "sql-sqlite")]
26807 #[test]
26808 fn sql_inspector_server_version_and_dialect() {
26809 let conn = make_sql_test_conn();
26810 let inspector = SqlInspector::new(&conn);
26811 let version = inspector.server_version().unwrap().unwrap();
26812 assert!(version.starts_with("3."));
26813 assert_eq!(inspector.dialect_name(), "sqlite");
26814 assert_eq!(inspector.max_identifier_length(), None);
26816 }
26817
26818 #[cfg(feature = "sql-sqlite")]
26819 #[test]
26820 fn sql_inspector_table_comment_returns_none_on_sqlite() {
26821 let conn = make_sql_test_conn();
26822 super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
26823 let inspector = SqlInspector::new(&conn);
26824 assert!(inspector.table_comment("t", None).unwrap().is_none());
26825 }
26826
26827 #[cfg(feature = "sql-sqlite")]
26828 #[test]
26829 fn sql_inspector_via_inspect_helper() {
26830 use super::inspect;
26834 let conn = make_sql_test_conn();
26835 super::SqlConnection::execute_batch(&conn, "CREATE TABLE one (x INTEGER);").unwrap();
26836 let inspector = inspect(&conn);
26837 assert_eq!(inspector.tables(None).unwrap(), vec!["one"]);
26838 }
26839
26840 #[test]
26841 fn sql_inspector_routes_schema_arg_to_backend() {
26842 struct PgLikeStub;
26845 impl super::SqlConnection for PgLikeStub {
26846 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26847 Ok(SqlQueryResult {
26848 columns: vec![],
26849 rows: vec![],
26850 })
26851 }
26852 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26853 Ok(())
26854 }
26855 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26856 Ok(false)
26857 }
26858 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26859 Ok(())
26860 }
26861 fn dtype_sql(&self, _dtype: DType) -> &'static str {
26862 "TEXT"
26863 }
26864 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26865 "TEXT"
26866 }
26867 fn supports_schemas(&self) -> bool {
26868 true
26869 }
26870 fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
26871 Ok(match schema {
26872 Some("analytics") => vec!["events".to_owned()],
26873 _ => vec![],
26874 })
26875 }
26876 fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
26877 Ok(match schema {
26878 Some("analytics") => vec!["daily".to_owned()],
26879 _ => vec![],
26880 })
26881 }
26882 fn list_schemas(&self) -> Result<Vec<String>, IoError> {
26883 Ok(vec!["public".to_owned(), "analytics".to_owned()])
26884 }
26885 fn dialect_name(&self) -> &'static str {
26886 "postgresql"
26887 }
26888 fn max_identifier_length(&self) -> Option<usize> {
26889 Some(63)
26890 }
26891 }
26892 let conn = PgLikeStub;
26893 let inspector = SqlInspector::new(&conn);
26894
26895 assert_eq!(inspector.tables(Some("analytics")).unwrap(), vec!["events"]);
26896 assert_eq!(inspector.views(Some("analytics")).unwrap(), vec!["daily"]);
26897 assert!(inspector.tables(Some("audit")).unwrap().is_empty());
26898 assert_eq!(inspector.schemas().unwrap(), vec!["public", "analytics"]);
26899 assert_eq!(inspector.dialect_name(), "postgresql");
26900 assert_eq!(inspector.max_identifier_length(), Some(63));
26901 }
26902
26903 #[cfg(feature = "sql-sqlite")]
26906 #[test]
26907 fn sql_inspector_has_column_returns_true_for_present_column() {
26908 let conn = make_sql_test_conn();
26909 super::SqlConnection::execute_batch(
26910 &conn,
26911 "CREATE TABLE has_col_tbl (id INTEGER, name TEXT);",
26912 )
26913 .unwrap();
26914 let inspector = SqlInspector::new(&conn);
26915 assert!(inspector.has_column("has_col_tbl", "id", None).unwrap());
26916 assert!(inspector.has_column("has_col_tbl", "name", None).unwrap());
26917 }
26918
26919 #[cfg(feature = "sql-sqlite")]
26920 #[test]
26921 fn sql_inspector_has_column_returns_false_for_missing_column() {
26922 let conn = make_sql_test_conn();
26923 super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_id (id INTEGER);").unwrap();
26924 let inspector = SqlInspector::new(&conn);
26925 assert!(!inspector.has_column("only_id", "name", None).unwrap());
26927 }
26928
26929 #[cfg(feature = "sql-sqlite")]
26930 #[test]
26931 fn sql_inspector_has_column_returns_false_for_missing_table() {
26932 let conn = make_sql_test_conn();
26933 let inspector = SqlInspector::new(&conn);
26934 assert!(
26936 !inspector
26937 .has_column("no_such_tbl", "any_col", None)
26938 .unwrap()
26939 );
26940 }
26941
26942 #[cfg(feature = "sql-sqlite")]
26943 #[test]
26944 fn sql_inspector_column_returns_full_metadata_for_present_column() {
26945 let conn = make_sql_test_conn();
26946 super::SqlConnection::execute_batch(
26947 &conn,
26948 "CREATE TABLE detailed (id INTEGER PRIMARY KEY, status TEXT DEFAULT 'active');",
26949 )
26950 .unwrap();
26951 let inspector = SqlInspector::new(&conn);
26952 let id = inspector.column("detailed", "id", None).unwrap().unwrap();
26953 assert_eq!(id.name, "id");
26954 assert_eq!(id.declared_type.as_deref(), Some("INTEGER"));
26955 assert_eq!(id.primary_key_ordinal, Some(0));
26956 assert!(id.autoincrement);
26958
26959 let status = inspector
26960 .column("detailed", "status", None)
26961 .unwrap()
26962 .unwrap();
26963 assert_eq!(status.declared_type.as_deref(), Some("TEXT"));
26964 assert!(status.nullable);
26965 assert_eq!(status.default_value.as_deref(), Some("'active'"));
26966 assert!(!status.autoincrement);
26967 }
26968
26969 #[cfg(feature = "sql-sqlite")]
26970 #[test]
26971 fn sql_inspector_column_returns_none_for_missing_column_or_table() {
26972 let conn = make_sql_test_conn();
26973 super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_x (x INTEGER);").unwrap();
26974 let inspector = SqlInspector::new(&conn);
26975 assert!(
26977 inspector
26978 .column("only_x", "missing", None)
26979 .unwrap()
26980 .is_none()
26981 );
26982 assert!(inspector.column("no_such", "any", None).unwrap().is_none());
26984 }
26985
26986 #[test]
26987 fn sql_inspector_has_column_routes_schema_arg_to_backend() {
26988 struct PgLikeColumns;
26989 impl super::SqlConnection for PgLikeColumns {
26990 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26991 Ok(SqlQueryResult {
26992 columns: vec![],
26993 rows: vec![],
26994 })
26995 }
26996 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26997 Ok(())
26998 }
26999 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27000 Ok(false)
27001 }
27002 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27003 Ok(())
27004 }
27005 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27006 "TEXT"
27007 }
27008 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27009 "TEXT"
27010 }
27011 fn supports_schemas(&self) -> bool {
27012 true
27013 }
27014 fn table_schema(
27015 &self,
27016 table: &str,
27017 schema: Option<&str>,
27018 ) -> Result<Option<SqlTableSchema>, IoError> {
27019 if table == "users" && schema == Some("public") {
27020 Ok(Some(SqlTableSchema {
27021 table_name: "users".to_owned(),
27022 columns: vec![SqlColumnSchema {
27023 name: "id".to_owned(),
27024 declared_type: Some("BIGINT".to_owned()),
27025 nullable: false,
27026 default_value: None,
27027 primary_key_ordinal: Some(0),
27028 comment: None,
27029 autoincrement: true,
27030 }],
27031 }))
27032 } else {
27033 Ok(None)
27034 }
27035 }
27036 }
27037 let conn = PgLikeColumns;
27038 let inspector = SqlInspector::new(&conn);
27039 assert!(inspector.has_column("users", "id", Some("public")).unwrap());
27040 assert!(!inspector.has_column("users", "id", Some("audit")).unwrap());
27041 assert!(
27042 !inspector
27043 .has_column("users", "missing", Some("public"))
27044 .unwrap()
27045 );
27046
27047 let id_col = inspector
27048 .column("users", "id", Some("public"))
27049 .unwrap()
27050 .unwrap();
27051 assert_eq!(id_col.declared_type.as_deref(), Some("BIGINT"));
27052 assert!(id_col.autoincrement);
27053 assert!(
27054 inspector
27055 .column("users", "id", Some("audit"))
27056 .unwrap()
27057 .is_none()
27058 );
27059 }
27060
27061 #[cfg(feature = "sql-sqlite")]
27064 #[test]
27065 fn sql_inspector_reflect_table_unknown_returns_none() {
27066 let conn = make_sql_test_conn();
27067 let inspector = SqlInspector::new(&conn);
27068 let result = inspector.reflect_table("no_such", None).unwrap();
27069 assert!(result.is_none());
27070 }
27071
27072 #[cfg(feature = "sql-sqlite")]
27073 #[test]
27074 fn sql_inspector_reflect_table_bundles_all_metadata() {
27075 let conn = make_sql_test_conn();
27076 super::SqlConnection::execute_batch(
27077 &conn,
27078 "CREATE TABLE parent (pid INTEGER PRIMARY KEY, code TEXT);",
27079 )
27080 .unwrap();
27081 super::SqlConnection::execute_batch(
27082 &conn,
27083 "CREATE TABLE bundled ( \
27084 id INTEGER PRIMARY KEY, \
27085 parent_id INTEGER, \
27086 slug TEXT, \
27087 email TEXT UNIQUE, \
27088 FOREIGN KEY (parent_id) REFERENCES parent(pid) \
27089 );",
27090 )
27091 .unwrap();
27092 super::SqlConnection::execute_batch(
27093 &conn,
27094 "CREATE INDEX idx_bundled_slug ON bundled(slug);",
27095 )
27096 .unwrap();
27097
27098 let inspector = SqlInspector::new(&conn);
27099 let bundle = inspector
27100 .reflect_table("bundled", None)
27101 .unwrap()
27102 .expect("table exists");
27103
27104 assert_eq!(bundle.table_name, "bundled");
27105
27106 let names: Vec<&str> = bundle.columns.iter().map(|c| c.name.as_str()).collect();
27108 assert_eq!(names, vec!["id", "parent_id", "slug", "email"]);
27109 let id_col = bundle
27111 .columns
27112 .iter()
27113 .find(|c| c.name == "id")
27114 .expect("id col");
27115 assert!(id_col.autoincrement);
27116
27117 assert_eq!(bundle.primary_key_columns, vec!["id"]);
27119
27120 assert_eq!(bundle.indexes.len(), 1);
27124 assert_eq!(bundle.indexes[0].name, "idx_bundled_slug");
27125
27126 assert_eq!(bundle.unique_constraints.len(), 1);
27128 assert_eq!(bundle.unique_constraints[0].columns, vec!["email"]);
27129
27130 assert_eq!(bundle.foreign_keys.len(), 1);
27132 assert_eq!(bundle.foreign_keys[0].columns, vec!["parent_id"]);
27133 assert_eq!(bundle.foreign_keys[0].referenced_table, "parent");
27134
27135 assert!(bundle.comment.is_none());
27137 }
27138
27139 #[test]
27140 fn sql_inspector_reflect_table_routes_to_backend_override() {
27141 struct PgLikeBundle;
27145 impl super::SqlConnection for PgLikeBundle {
27146 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27147 Ok(SqlQueryResult {
27148 columns: vec![],
27149 rows: vec![],
27150 })
27151 }
27152 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27153 Ok(())
27154 }
27155 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27156 Ok(false)
27157 }
27158 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27159 Ok(())
27160 }
27161 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27162 "TEXT"
27163 }
27164 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27165 "TEXT"
27166 }
27167 fn supports_schemas(&self) -> bool {
27168 true
27169 }
27170 fn table_schema(
27171 &self,
27172 table: &str,
27173 schema: Option<&str>,
27174 ) -> Result<Option<SqlTableSchema>, IoError> {
27175 if table == "users" && schema == Some("public") {
27176 Ok(Some(SqlTableSchema {
27177 table_name: "users".to_owned(),
27178 columns: vec![SqlColumnSchema {
27179 name: "id".to_owned(),
27180 declared_type: Some("BIGINT".to_owned()),
27181 nullable: false,
27182 default_value: None,
27183 primary_key_ordinal: Some(0),
27184 comment: None,
27185 autoincrement: true,
27186 }],
27187 }))
27188 } else {
27189 Ok(None)
27190 }
27191 }
27192 fn primary_key_columns(
27193 &self,
27194 table: &str,
27195 schema: Option<&str>,
27196 ) -> Result<Vec<String>, IoError> {
27197 if table == "users" && schema == Some("public") {
27198 Ok(vec!["id".to_owned()])
27199 } else {
27200 Ok(vec![])
27201 }
27202 }
27203 fn list_indexes(
27204 &self,
27205 _table: &str,
27206 _schema: Option<&str>,
27207 ) -> Result<Vec<SqlIndexSchema>, IoError> {
27208 Ok(vec![SqlIndexSchema {
27209 name: "users_status_idx".to_owned(),
27210 columns: vec!["status".to_owned()],
27211 unique: false,
27212 }])
27213 }
27214 fn list_foreign_keys(
27215 &self,
27216 _table: &str,
27217 _schema: Option<&str>,
27218 ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
27219 Ok(vec![])
27220 }
27221 fn list_unique_constraints(
27222 &self,
27223 _table: &str,
27224 _schema: Option<&str>,
27225 ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
27226 Ok(vec![SqlUniqueConstraintSchema {
27227 name: "users_email_key".to_owned(),
27228 columns: vec!["email".to_owned()],
27229 }])
27230 }
27231 fn table_comment(
27232 &self,
27233 _table: &str,
27234 _schema: Option<&str>,
27235 ) -> Result<Option<String>, IoError> {
27236 Ok(Some("Customer accounts".to_owned()))
27237 }
27238 }
27239 let conn = PgLikeBundle;
27240 let inspector = SqlInspector::new(&conn);
27241 let bundle = inspector
27242 .reflect_table("users", Some("public"))
27243 .unwrap()
27244 .expect("present");
27245 assert_eq!(bundle.table_name, "users");
27246 assert_eq!(bundle.columns.len(), 1);
27247 assert_eq!(bundle.primary_key_columns, vec!["id"]);
27248 assert_eq!(bundle.indexes.len(), 1);
27249 assert_eq!(bundle.indexes[0].name, "users_status_idx");
27250 assert_eq!(bundle.unique_constraints.len(), 1);
27251 assert_eq!(bundle.foreign_keys.len(), 0);
27252 assert_eq!(bundle.comment.as_deref(), Some("Customer accounts"));
27253
27254 assert!(
27256 inspector
27257 .reflect_table("users", Some("audit"))
27258 .unwrap()
27259 .is_none()
27260 );
27261 }
27262
27263 #[test]
27266 fn sql_reflected_table_bundle_smoke_test() {
27267 let bundle = SqlReflectedTable {
27268 table_name: "t".to_owned(),
27269 columns: vec![],
27270 primary_key_columns: vec![],
27271 indexes: vec![],
27272 foreign_keys: vec![],
27273 unique_constraints: vec![],
27274 comment: None,
27275 };
27276 assert_eq!(bundle.table_name, "t");
27277 assert!(bundle.columns.is_empty());
27278 }
27279
27280 #[test]
27283 fn sql_reflected_table_accessors_find_named_entries() {
27284 let bundle = SqlReflectedTable {
27285 table_name: "orders".to_owned(),
27286 columns: vec![
27287 SqlColumnSchema {
27288 name: "id".to_owned(),
27289 declared_type: Some("INTEGER".to_owned()),
27290 nullable: false,
27291 default_value: None,
27292 primary_key_ordinal: Some(0),
27293 comment: None,
27294 autoincrement: true,
27295 },
27296 SqlColumnSchema {
27297 name: "user_id".to_owned(),
27298 declared_type: Some("INTEGER".to_owned()),
27299 nullable: false,
27300 default_value: None,
27301 primary_key_ordinal: None,
27302 comment: None,
27303 autoincrement: false,
27304 },
27305 ],
27306 primary_key_columns: vec!["id".to_owned()],
27307 indexes: vec![SqlIndexSchema {
27308 name: "idx_orders_user".to_owned(),
27309 columns: vec!["user_id".to_owned()],
27310 unique: false,
27311 }],
27312 foreign_keys: vec![SqlForeignKeySchema {
27313 constraint_name: None,
27314 columns: vec!["user_id".to_owned()],
27315 referenced_table: "users".to_owned(),
27316 referenced_columns: vec!["id".to_owned()],
27317 }],
27318 unique_constraints: vec![SqlUniqueConstraintSchema {
27319 name: "uq_orders_id".to_owned(),
27320 columns: vec!["id".to_owned()],
27321 }],
27322 comment: Some("Customer orders".to_owned()),
27323 };
27324
27325 let id = bundle.column("id").expect("id column");
27327 assert_eq!(id.declared_type.as_deref(), Some("INTEGER"));
27328 assert!(id.autoincrement);
27329 assert!(bundle.column("missing").is_none());
27330
27331 let idx = bundle.index("idx_orders_user").expect("idx");
27333 assert_eq!(idx.columns, vec!["user_id"]);
27334 assert!(bundle.index("idx_does_not_exist").is_none());
27335
27336 let uq = bundle.unique_constraint("uq_orders_id").expect("uq");
27338 assert_eq!(uq.columns, vec!["id"]);
27339 assert!(bundle.unique_constraint("uq_missing").is_none());
27340
27341 let fks = bundle.foreign_keys_for_column("user_id");
27343 assert_eq!(fks.len(), 1);
27344 assert_eq!(fks[0].referenced_table, "users");
27345 assert!(bundle.foreign_keys_for_column("id").is_empty());
27347 assert!(bundle.foreign_keys_for_column("nonexistent").is_empty());
27348 }
27349
27350 #[test]
27351 fn sql_reflected_table_foreign_keys_for_column_handles_composite_fks() {
27352 let bundle = SqlReflectedTable {
27353 table_name: "rolling_fact".to_owned(),
27354 columns: vec![],
27355 primary_key_columns: vec![],
27356 indexes: vec![],
27357 foreign_keys: vec![SqlForeignKeySchema {
27358 constraint_name: None,
27359 columns: vec!["fyear".to_owned(), "fmonth".to_owned()],
27360 referenced_table: "rolling".to_owned(),
27361 referenced_columns: vec!["year".to_owned(), "month".to_owned()],
27362 }],
27363 unique_constraints: vec![],
27364 comment: None,
27365 };
27366 assert_eq!(bundle.foreign_keys_for_column("fyear").len(), 1);
27369 assert_eq!(bundle.foreign_keys_for_column("fmonth").len(), 1);
27370 assert!(bundle.foreign_keys_for_column("year").is_empty()); }
27372
27373 #[test]
27374 fn sql_reflected_table_foreign_keys_for_column_returns_multiple_when_relevant() {
27375 let bundle = SqlReflectedTable {
27378 table_name: "audit".to_owned(),
27379 columns: vec![],
27380 primary_key_columns: vec![],
27381 indexes: vec![],
27382 foreign_keys: vec![
27383 SqlForeignKeySchema {
27384 constraint_name: Some("fk_audit_a".to_owned()),
27385 columns: vec!["entity_id".to_owned()],
27386 referenced_table: "users".to_owned(),
27387 referenced_columns: vec!["id".to_owned()],
27388 },
27389 SqlForeignKeySchema {
27390 constraint_name: Some("fk_audit_b".to_owned()),
27391 columns: vec!["entity_id".to_owned()],
27392 referenced_table: "products".to_owned(),
27393 referenced_columns: vec!["id".to_owned()],
27394 },
27395 ],
27396 unique_constraints: vec![],
27397 comment: None,
27398 };
27399 let fks = bundle.foreign_keys_for_column("entity_id");
27400 assert_eq!(fks.len(), 2);
27401 assert_eq!(fks[0].constraint_name.as_deref(), Some("fk_audit_a"));
27403 assert_eq!(fks[1].constraint_name.as_deref(), Some("fk_audit_b"));
27404 }
27405
27406 #[test]
27409 fn sql_reflected_table_indexes_for_column_matches_any_position() {
27410 let bundle = SqlReflectedTable {
27411 table_name: "rolling".to_owned(),
27412 columns: vec![],
27413 primary_key_columns: vec![],
27414 indexes: vec![
27415 SqlIndexSchema {
27416 name: "idx_rolling_year".to_owned(),
27417 columns: vec!["year".to_owned()],
27418 unique: false,
27419 },
27420 SqlIndexSchema {
27421 name: "idx_rolling_y_m_c".to_owned(),
27422 columns: vec!["year".to_owned(), "month".to_owned(), "code".to_owned()],
27423 unique: false,
27424 },
27425 ],
27426 foreign_keys: vec![],
27427 unique_constraints: vec![],
27428 comment: None,
27429 };
27430
27431 let year_idxs = bundle.indexes_for_column("year");
27434 assert_eq!(year_idxs.len(), 2);
27435
27436 let month_idxs = bundle.indexes_for_column("month");
27438 assert_eq!(month_idxs.len(), 1);
27439 assert_eq!(month_idxs[0].name, "idx_rolling_y_m_c");
27440
27441 let code_idxs = bundle.indexes_for_column("code");
27443 assert_eq!(code_idxs.len(), 1);
27444
27445 assert!(bundle.indexes_for_column("nonexistent").is_empty());
27447 }
27448
27449 #[test]
27450 fn sql_reflected_table_unique_constraints_for_column_matches_any_position() {
27451 let bundle = SqlReflectedTable {
27452 table_name: "events".to_owned(),
27453 columns: vec![],
27454 primary_key_columns: vec![],
27455 indexes: vec![],
27456 foreign_keys: vec![],
27457 unique_constraints: vec![
27458 SqlUniqueConstraintSchema {
27459 name: "uq_events_email".to_owned(),
27460 columns: vec!["email".to_owned()],
27461 },
27462 SqlUniqueConstraintSchema {
27463 name: "uq_events_user_event_ts".to_owned(),
27464 columns: vec!["user_id".to_owned(), "event_id".to_owned(), "ts".to_owned()],
27465 },
27466 ],
27467 comment: None,
27468 };
27469
27470 let email_uqs = bundle.unique_constraints_for_column("email");
27471 assert_eq!(email_uqs.len(), 1);
27472 assert_eq!(email_uqs[0].name, "uq_events_email");
27473
27474 let event_uqs = bundle.unique_constraints_for_column("event_id");
27476 assert_eq!(event_uqs.len(), 1);
27477 assert_eq!(event_uqs[0].columns, vec!["user_id", "event_id", "ts"]);
27478
27479 let ts_uqs = bundle.unique_constraints_for_column("ts");
27481 assert_eq!(ts_uqs.len(), 1);
27482
27483 assert!(
27484 bundle
27485 .unique_constraints_for_column("nonexistent")
27486 .is_empty()
27487 );
27488 }
27489
27490 #[test]
27491 fn sql_reflected_table_for_column_accessors_return_multiple() {
27492 let bundle = SqlReflectedTable {
27494 table_name: "wide".to_owned(),
27495 columns: vec![],
27496 primary_key_columns: vec![],
27497 indexes: vec![
27498 SqlIndexSchema {
27499 name: "idx_a".to_owned(),
27500 columns: vec!["x".to_owned()],
27501 unique: false,
27502 },
27503 SqlIndexSchema {
27504 name: "idx_b".to_owned(),
27505 columns: vec!["x".to_owned(), "y".to_owned()],
27506 unique: true,
27507 },
27508 ],
27509 foreign_keys: vec![],
27510 unique_constraints: vec![
27511 SqlUniqueConstraintSchema {
27512 name: "uq_a".to_owned(),
27513 columns: vec!["x".to_owned()],
27514 },
27515 SqlUniqueConstraintSchema {
27516 name: "uq_b".to_owned(),
27517 columns: vec!["x".to_owned(), "z".to_owned()],
27518 },
27519 ],
27520 comment: None,
27521 };
27522
27523 let idx_for_x = bundle.indexes_for_column("x");
27524 assert_eq!(idx_for_x.len(), 2);
27525 assert_eq!(idx_for_x[0].name, "idx_a");
27526 assert_eq!(idx_for_x[1].name, "idx_b");
27527
27528 let uq_for_x = bundle.unique_constraints_for_column("x");
27529 assert_eq!(uq_for_x.len(), 2);
27530 assert_eq!(uq_for_x[0].name, "uq_a");
27531 assert_eq!(uq_for_x[1].name, "uq_b");
27532 }
27533
27534 #[cfg(feature = "sql-sqlite")]
27537 #[test]
27538 fn sql_inspector_reflect_all_tables_empty_db() {
27539 let conn = make_sql_test_conn();
27540 let inspector = SqlInspector::new(&conn);
27541 let bundles = inspector.reflect_all_tables(None).unwrap();
27542 assert!(bundles.is_empty());
27543 }
27544
27545 #[cfg(feature = "sql-sqlite")]
27546 #[test]
27547 fn sql_inspector_reflect_all_tables_returns_one_bundle_per_table() {
27548 let conn = make_sql_test_conn();
27549 super::SqlConnection::execute_batch(
27550 &conn,
27551 "CREATE TABLE alpha (id INTEGER PRIMARY KEY, name TEXT);",
27552 )
27553 .unwrap();
27554 super::SqlConnection::execute_batch(&conn, "CREATE TABLE beta (uid INTEGER, label TEXT);")
27555 .unwrap();
27556 let inspector = SqlInspector::new(&conn);
27557 let bundles = inspector.reflect_all_tables(None).unwrap();
27558 assert_eq!(bundles.len(), 2);
27559 assert_eq!(bundles[0].table_name, "alpha");
27561 assert_eq!(bundles[1].table_name, "beta");
27562 assert_eq!(
27564 bundles[0]
27565 .columns
27566 .iter()
27567 .map(|c| c.name.as_str())
27568 .collect::<Vec<_>>(),
27569 vec!["id", "name"]
27570 );
27571 assert_eq!(bundles[0].primary_key_columns, vec!["id"]);
27572 assert_eq!(bundles[1].columns.len(), 2);
27573 assert!(bundles[1].primary_key_columns.is_empty());
27574 }
27575
27576 #[test]
27577 fn sql_inspector_reflect_all_tables_skips_disappearing_tables() {
27578 struct DisappearingTable;
27583 impl super::SqlConnection for DisappearingTable {
27584 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27585 Ok(SqlQueryResult {
27586 columns: vec![],
27587 rows: vec![],
27588 })
27589 }
27590 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27591 Ok(())
27592 }
27593 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27594 Ok(false)
27595 }
27596 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27597 Ok(())
27598 }
27599 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27600 "TEXT"
27601 }
27602 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27603 "TEXT"
27604 }
27605 fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
27606 Ok(vec!["a".to_owned(), "b".to_owned()])
27607 }
27608 fn table_schema(
27609 &self,
27610 table: &str,
27611 _schema: Option<&str>,
27612 ) -> Result<Option<SqlTableSchema>, IoError> {
27613 if table == "a" {
27614 Ok(Some(SqlTableSchema {
27615 table_name: "a".to_owned(),
27616 columns: vec![SqlColumnSchema {
27617 name: "x".to_owned(),
27618 declared_type: Some("INTEGER".to_owned()),
27619 nullable: true,
27620 default_value: None,
27621 primary_key_ordinal: None,
27622 comment: None,
27623 autoincrement: false,
27624 }],
27625 }))
27626 } else {
27627 Ok(None)
27629 }
27630 }
27631 }
27632 let conn = DisappearingTable;
27633 let inspector = SqlInspector::new(&conn);
27634 let bundles = inspector.reflect_all_tables(None).unwrap();
27635 assert_eq!(bundles.len(), 1);
27636 assert_eq!(bundles[0].table_name, "a");
27637 }
27638
27639 #[test]
27640 fn sql_inspector_reflect_all_tables_routes_schema_arg() {
27641 struct MultiSchemaReflect;
27644 impl super::SqlConnection for MultiSchemaReflect {
27645 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27646 Ok(SqlQueryResult {
27647 columns: vec![],
27648 rows: vec![],
27649 })
27650 }
27651 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27652 Ok(())
27653 }
27654 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27655 Ok(false)
27656 }
27657 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27658 Ok(())
27659 }
27660 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27661 "TEXT"
27662 }
27663 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27664 "TEXT"
27665 }
27666 fn supports_schemas(&self) -> bool {
27667 true
27668 }
27669 fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
27670 Ok(match schema {
27671 Some("analytics") => vec!["events".to_owned()],
27672 _ => vec![],
27673 })
27674 }
27675 fn table_schema(
27676 &self,
27677 table: &str,
27678 schema: Option<&str>,
27679 ) -> Result<Option<SqlTableSchema>, IoError> {
27680 if table == "events" && schema == Some("analytics") {
27681 Ok(Some(SqlTableSchema {
27682 table_name: "events".to_owned(),
27683 columns: vec![SqlColumnSchema {
27684 name: "ts".to_owned(),
27685 declared_type: Some("TIMESTAMPTZ".to_owned()),
27686 nullable: false,
27687 default_value: None,
27688 primary_key_ordinal: None,
27689 comment: None,
27690 autoincrement: false,
27691 }],
27692 }))
27693 } else {
27694 Ok(None)
27695 }
27696 }
27697 }
27698 let conn = MultiSchemaReflect;
27699 let inspector = SqlInspector::new(&conn);
27700 let bundles = inspector.reflect_all_tables(Some("analytics")).unwrap();
27701 assert_eq!(bundles.len(), 1);
27702 assert_eq!(bundles[0].table_name, "events");
27703 assert_eq!(
27704 bundles[0].columns[0].declared_type.as_deref(),
27705 Some("TIMESTAMPTZ")
27706 );
27707 assert!(
27709 inspector
27710 .reflect_all_tables(Some("audit"))
27711 .unwrap()
27712 .is_empty()
27713 );
27714 }
27715
27716 #[cfg(feature = "sql-sqlite")]
27719 #[test]
27720 fn sql_inspector_reflect_all_views_empty_db() {
27721 let conn = make_sql_test_conn();
27722 let inspector = SqlInspector::new(&conn);
27723 let bundles = inspector.reflect_all_views(None).unwrap();
27724 assert!(bundles.is_empty());
27725 }
27726
27727 #[cfg(feature = "sql-sqlite")]
27728 #[test]
27729 fn sql_inspector_reflect_all_views_returns_one_bundle_per_view() {
27730 let conn = make_sql_test_conn();
27731 super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (id INTEGER, label TEXT);")
27732 .unwrap();
27733 super::SqlConnection::execute_batch(
27734 &conn,
27735 "CREATE VIEW alpha_view AS SELECT id FROM base;",
27736 )
27737 .unwrap();
27738 super::SqlConnection::execute_batch(
27739 &conn,
27740 "CREATE VIEW zebra_view AS SELECT label FROM base;",
27741 )
27742 .unwrap();
27743 let inspector = SqlInspector::new(&conn);
27744 let bundles = inspector.reflect_all_views(None).unwrap();
27745 assert_eq!(bundles.len(), 2);
27747 assert_eq!(bundles[0].table_name, "alpha_view");
27748 assert_eq!(bundles[1].table_name, "zebra_view");
27749 assert_eq!(
27751 bundles[0]
27752 .columns
27753 .iter()
27754 .map(|c| c.name.as_str())
27755 .collect::<Vec<_>>(),
27756 vec!["id"]
27757 );
27758 assert_eq!(
27759 bundles[1]
27760 .columns
27761 .iter()
27762 .map(|c| c.name.as_str())
27763 .collect::<Vec<_>>(),
27764 vec!["label"]
27765 );
27766 for bundle in &bundles {
27768 assert!(bundle.primary_key_columns.is_empty());
27769 assert!(bundle.indexes.is_empty());
27770 assert!(bundle.foreign_keys.is_empty());
27771 assert!(bundle.unique_constraints.is_empty());
27772 }
27773 }
27774
27775 #[test]
27776 fn sql_inspector_reflect_all_views_routes_schema_arg() {
27777 struct MultiSchemaViewReflect;
27779 impl super::SqlConnection for MultiSchemaViewReflect {
27780 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27781 Ok(SqlQueryResult {
27782 columns: vec![],
27783 rows: vec![],
27784 })
27785 }
27786 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27787 Ok(())
27788 }
27789 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27790 Ok(false)
27791 }
27792 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27793 Ok(())
27794 }
27795 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27796 "TEXT"
27797 }
27798 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27799 "TEXT"
27800 }
27801 fn supports_schemas(&self) -> bool {
27802 true
27803 }
27804 fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
27805 Ok(match schema {
27806 Some("reporting") => vec!["weekly_summary".to_owned()],
27807 _ => vec![],
27808 })
27809 }
27810 fn table_schema(
27811 &self,
27812 table: &str,
27813 schema: Option<&str>,
27814 ) -> Result<Option<SqlTableSchema>, IoError> {
27815 if table == "weekly_summary" && schema == Some("reporting") {
27816 Ok(Some(SqlTableSchema {
27817 table_name: "weekly_summary".to_owned(),
27818 columns: vec![SqlColumnSchema {
27819 name: "week".to_owned(),
27820 declared_type: Some("DATE".to_owned()),
27821 nullable: true,
27822 default_value: None,
27823 primary_key_ordinal: None,
27824 comment: None,
27825 autoincrement: false,
27826 }],
27827 }))
27828 } else {
27829 Ok(None)
27830 }
27831 }
27832 }
27833 let conn = MultiSchemaViewReflect;
27834 let inspector = SqlInspector::new(&conn);
27835 let bundles = inspector.reflect_all_views(Some("reporting")).unwrap();
27836 assert_eq!(bundles.len(), 1);
27837 assert_eq!(bundles[0].table_name, "weekly_summary");
27838 assert_eq!(bundles[0].columns[0].declared_type.as_deref(), Some("DATE"));
27839 assert!(
27841 inspector
27842 .reflect_all_views(Some("audit"))
27843 .unwrap()
27844 .is_empty()
27845 );
27846 }
27847
27848 #[test]
27849 fn sql_inspector_reflect_table_calls_table_schema_only_once() {
27850 use std::cell::Cell;
27856 struct CountingTableSchema {
27857 table_schema_calls: Cell<usize>,
27858 }
27859 impl super::SqlConnection for CountingTableSchema {
27860 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27861 Ok(SqlQueryResult {
27862 columns: vec![],
27863 rows: vec![],
27864 })
27865 }
27866 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27867 Ok(())
27868 }
27869 fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27870 Ok(false)
27871 }
27872 fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27873 Ok(())
27874 }
27875 fn dtype_sql(&self, _dtype: DType) -> &'static str {
27876 "TEXT"
27877 }
27878 fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27879 "TEXT"
27880 }
27881 fn table_schema(
27882 &self,
27883 _table: &str,
27884 _schema: Option<&str>,
27885 ) -> Result<Option<SqlTableSchema>, IoError> {
27886 self.table_schema_calls
27887 .set(self.table_schema_calls.get() + 1);
27888 Ok(Some(SqlTableSchema {
27889 table_name: "x".to_owned(),
27890 columns: vec![SqlColumnSchema {
27891 name: "id".to_owned(),
27892 declared_type: Some("BIGINT".to_owned()),
27893 nullable: false,
27894 default_value: None,
27895 primary_key_ordinal: Some(0),
27896 comment: None,
27897 autoincrement: true,
27898 }],
27899 }))
27900 }
27901 }
27902 let conn = CountingTableSchema {
27903 table_schema_calls: Cell::new(0),
27904 };
27905 let inspector = SqlInspector::new(&conn);
27906 let bundle = inspector.reflect_table("x", None).unwrap().unwrap();
27907 assert_eq!(conn.table_schema_calls.get(), 1);
27910 assert_eq!(bundle.primary_key_columns, vec!["id"]);
27911 }
27912
27913 #[test]
27916 fn sql_read_options_default_coerce_float_matches_pandas() {
27917 let opts = SqlReadOptions::default();
27921 assert!(
27922 opts.coerce_float,
27923 "default coerce_float must be true (pandas parity)"
27924 );
27925 assert!(opts.params.is_none());
27927 assert!(opts.parse_dates.is_none());
27928 assert!(opts.dtype.is_none());
27929 assert!(opts.schema.is_none());
27930 assert!(opts.columns.is_none());
27931 assert!(opts.index_col.is_none());
27932 }
27933
27934 #[derive(Default)]
27945 struct AnsiSchemaConn;
27946 impl super::SqlConnection for AnsiSchemaConn {
27947 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
27948 Ok(super::SqlQueryResult {
27949 columns: vec![],
27950 rows: vec![],
27951 })
27952 }
27953 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27954 Ok(())
27955 }
27956 fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
27957 Ok(false)
27958 }
27959 fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
27960 Ok(())
27961 }
27962 fn dtype_sql(&self, _d: DType) -> &'static str {
27963 "TEXT"
27964 }
27965 fn index_dtype_sql(&self, _i: &Index) -> &'static str {
27966 "TEXT"
27967 }
27968 fn supports_schemas(&self) -> bool {
27969 true
27970 }
27971 fn parameter_marker(&self, ordinal: usize) -> String {
27972 format!("${ordinal}")
27973 }
27974 fn max_identifier_length(&self) -> Option<usize> {
27975 Some(63)
27976 }
27977 }
27978
27979 #[derive(Default)]
27980 struct MysqlBacktickConn;
27981 impl super::SqlConnection for MysqlBacktickConn {
27982 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
27983 Ok(super::SqlQueryResult {
27984 columns: vec![],
27985 rows: vec![],
27986 })
27987 }
27988 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27989 Ok(())
27990 }
27991 fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
27992 Ok(false)
27993 }
27994 fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
27995 Ok(())
27996 }
27997 fn dtype_sql(&self, _d: DType) -> &'static str {
27998 "TEXT"
27999 }
28000 fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28001 "TEXT"
28002 }
28003 fn supports_schemas(&self) -> bool {
28004 true
28005 }
28006 fn parameter_marker(&self, _ordinal: usize) -> String {
28007 "?".to_owned()
28008 }
28009 fn max_identifier_length(&self) -> Option<usize> {
28010 Some(64)
28011 }
28012 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28013 if ident.contains('\0') {
28014 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28015 }
28016 Ok(format!("`{}`", ident.replace('`', "``")))
28017 }
28018 }
28019
28020 #[derive(Default)]
28021 struct MssqlBracketConn;
28022 impl super::SqlConnection for MssqlBracketConn {
28023 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28024 Ok(super::SqlQueryResult {
28025 columns: vec![],
28026 rows: vec![],
28027 })
28028 }
28029 fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
28030 Ok(())
28031 }
28032 fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28033 Ok(false)
28034 }
28035 fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28036 Ok(())
28037 }
28038 fn dtype_sql(&self, _d: DType) -> &'static str {
28039 "NVARCHAR(MAX)"
28040 }
28041 fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28042 "NVARCHAR(MAX)"
28043 }
28044 fn supports_schemas(&self) -> bool {
28045 true
28046 }
28047 fn parameter_marker(&self, ordinal: usize) -> String {
28048 format!("@p{ordinal}")
28049 }
28050 fn max_identifier_length(&self) -> Option<usize> {
28051 Some(128)
28052 }
28053 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28054 if ident.contains('\0') {
28055 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28056 }
28057 Ok(format!("[{}]", ident.replace(']', "]]")))
28059 }
28060 }
28061
28062 const FD90_12_RESERVED_WORDS: &[&str] = &[
28066 "select", "from", "where", "order", "group", "table", "index", "join",
28067 ];
28068
28069 #[test]
28070 fn fd90_12_quoting_matrix_select_all_reserved_words_quoted_per_dialect() {
28071 let ansi = AnsiSchemaConn;
28072 let mysql = MysqlBacktickConn;
28073 let mssql = MssqlBracketConn;
28074 for word in FD90_12_RESERVED_WORDS {
28075 assert_eq!(
28076 super::sql_select_all_query(&ansi, word).expect("ansi select"),
28077 format!("SELECT * FROM \"{word}\""),
28078 "ansi reserved word `{word}`"
28079 );
28080 assert_eq!(
28081 super::sql_select_all_query(&mysql, word).expect("mysql select"),
28082 format!("SELECT * FROM `{word}`"),
28083 "mysql reserved word `{word}`"
28084 );
28085 assert_eq!(
28086 super::sql_select_all_query(&mssql, word).expect("mssql select"),
28087 format!("SELECT * FROM [{word}]"),
28088 "mssql reserved word `{word}`"
28089 );
28090 }
28091 }
28092
28093 #[test]
28094 fn fd90_12_quoting_matrix_select_columns_mixed_case_preserved_per_dialect() {
28095 let ansi = AnsiSchemaConn;
28096 let mysql = MysqlBacktickConn;
28097 let mssql = MssqlBracketConn;
28098 let cases: &[&str] = &["MyCol", "MIXEDcase", "camelCase", "SCREAMING_SNAKE"];
28099 for col in cases {
28100 assert_eq!(
28101 super::sql_select_columns_query(&ansi, "users", &[col]).expect("ansi cols"),
28102 format!("SELECT \"{col}\" FROM \"users\""),
28103 "ansi mixed-case col `{col}`"
28104 );
28105 assert_eq!(
28106 super::sql_select_columns_query(&mysql, "users", &[col]).expect("mysql cols"),
28107 format!("SELECT `{col}` FROM `users`"),
28108 "mysql mixed-case col `{col}`"
28109 );
28110 assert_eq!(
28111 super::sql_select_columns_query(&mssql, "users", &[col]).expect("mssql cols"),
28112 format!("SELECT [{col}] FROM [users]"),
28113 "mssql mixed-case col `{col}`"
28114 );
28115 }
28116 }
28117
28118 #[test]
28119 fn fd90_12_quoting_matrix_leading_digit_identifiers_quoted_per_dialect() {
28120 let ansi = AnsiSchemaConn;
28121 let mysql = MysqlBacktickConn;
28122 let mssql = MssqlBracketConn;
28123 let cases: &[&str] = &["1col", "2nd_place", "9lives", "123"];
28124 for col in cases {
28125 assert_eq!(
28126 super::sql_select_columns_query(&ansi, "tbl", &[col]).expect("ansi"),
28127 format!("SELECT \"{col}\" FROM \"tbl\"")
28128 );
28129 assert_eq!(
28130 super::sql_select_columns_query(&mysql, "tbl", &[col]).expect("mysql"),
28131 format!("SELECT `{col}` FROM `tbl`")
28132 );
28133 assert_eq!(
28134 super::sql_select_columns_query(&mssql, "tbl", &[col]).expect("mssql"),
28135 format!("SELECT [{col}] FROM [tbl]")
28136 );
28137 }
28138 }
28139
28140 #[test]
28141 fn fd90_12_quoting_matrix_schema_qualified_select_per_dialect() {
28142 let ansi = AnsiSchemaConn;
28143 let mysql = MysqlBacktickConn;
28144 let mssql = MssqlBracketConn;
28145 assert_eq!(
28146 super::sql_select_all_query_in_schema(&ansi, "users", Some("analytics")).expect("ansi"),
28147 "SELECT * FROM \"analytics\".\"users\""
28148 );
28149 assert_eq!(
28150 super::sql_select_all_query_in_schema(&mysql, "users", Some("analytics"))
28151 .expect("mysql"),
28152 "SELECT * FROM `analytics`.`users`"
28153 );
28154 assert_eq!(
28155 super::sql_select_all_query_in_schema(&mssql, "users", Some("dbo")).expect("mssql"),
28156 "SELECT * FROM [dbo].[users]"
28157 );
28158 }
28159
28160 #[test]
28161 fn fd90_12_quoting_matrix_create_table_per_dialect() {
28162 let ansi = AnsiSchemaConn;
28163 let mysql = MysqlBacktickConn;
28164 let mssql = MssqlBracketConn;
28165 let cols = vec![
28166 super::sql_column_definition(&ansi, "id", "BIGINT").expect("ansi col"),
28167 super::sql_column_definition(&ansi, "select", "TEXT").expect("ansi reserved col"),
28168 ];
28169 assert_eq!(
28170 super::sql_create_table_query_in_schema(&ansi, "events", Some("public"), &cols)
28171 .expect("ansi create"),
28172 "CREATE TABLE IF NOT EXISTS \"public\".\"events\" (\"id\" BIGINT, \"select\" TEXT)"
28173 );
28174 let mysql_cols = vec![
28175 super::sql_column_definition(&mysql, "id", "BIGINT").expect("mysql col"),
28176 super::sql_column_definition(&mysql, "select", "TEXT").expect("mysql reserved col"),
28177 ];
28178 assert_eq!(
28179 super::sql_create_table_query_in_schema(
28180 &mysql,
28181 "events",
28182 Some("analytics"),
28183 &mysql_cols
28184 )
28185 .expect("mysql create"),
28186 "CREATE TABLE IF NOT EXISTS `analytics`.`events` (`id` BIGINT, `select` TEXT)"
28187 );
28188 let mssql_cols = vec![
28189 super::sql_column_definition(&mssql, "id", "BIGINT").expect("mssql col"),
28190 super::sql_column_definition(&mssql, "select", "NVARCHAR(MAX)")
28191 .expect("mssql reserved col"),
28192 ];
28193 assert_eq!(
28194 super::sql_create_table_query_in_schema(&mssql, "events", Some("dbo"), &mssql_cols)
28195 .expect("mssql create"),
28196 "CREATE TABLE IF NOT EXISTS [dbo].[events] ([id] BIGINT, [select] NVARCHAR(MAX))"
28197 );
28198 }
28199
28200 #[test]
28201 fn fd90_12_quoting_matrix_insert_per_dialect_with_param_markers() {
28202 let ansi = AnsiSchemaConn;
28203 let mysql = MysqlBacktickConn;
28204 let mssql = MssqlBracketConn;
28205 let cols = vec!["id".to_owned(), "MixedCase".to_owned(), "select".to_owned()];
28206 assert_eq!(
28207 super::sql_insert_rows_query_in_schema(&ansi, "events", Some("public"), &cols)
28208 .expect("ansi insert"),
28209 "INSERT INTO \"public\".\"events\" (\"id\", \"MixedCase\", \"select\") VALUES ($1, $2, $3)"
28210 );
28211 assert_eq!(
28212 super::sql_insert_rows_query_in_schema(&mysql, "events", Some("analytics"), &cols)
28213 .expect("mysql insert"),
28214 "INSERT INTO `analytics`.`events` (`id`, `MixedCase`, `select`) VALUES (?, ?, ?)"
28215 );
28216 assert_eq!(
28217 super::sql_insert_rows_query_in_schema(&mssql, "events", Some("dbo"), &cols)
28218 .expect("mssql insert"),
28219 "INSERT INTO [dbo].[events] ([id], [MixedCase], [select]) VALUES (@p1, @p2, @p3)"
28220 );
28221 }
28222
28223 #[test]
28224 fn fd90_12_quoting_matrix_multi_row_insert_param_ordinals_span_rows() {
28225 let ansi = AnsiSchemaConn;
28226 let mysql = MysqlBacktickConn;
28227 let cols = vec!["a".to_owned(), "b".to_owned()];
28228 assert_eq!(
28229 super::sql_multi_row_insert_query_in_schema(&ansi, "tbl", None, &cols, 2)
28230 .expect("ansi multi"),
28231 "INSERT INTO \"tbl\" (\"a\", \"b\") VALUES ($1, $2), ($3, $4)"
28232 );
28233 assert_eq!(
28234 super::sql_multi_row_insert_query_in_schema(&mysql, "tbl", None, &cols, 2)
28235 .expect("mysql multi"),
28236 "INSERT INTO `tbl` (`a`, `b`) VALUES (?, ?), (?, ?)"
28237 );
28238 }
28239
28240 #[test]
28241 fn fd90_12_quoting_matrix_drop_table_per_dialect() {
28242 let ansi = AnsiSchemaConn;
28243 let mysql = MysqlBacktickConn;
28244 let mssql = MssqlBracketConn;
28245 assert_eq!(
28246 super::sql_drop_table_query_in_schema(&ansi, "events", Some("public"))
28247 .expect("ansi drop"),
28248 "DROP TABLE IF EXISTS \"public\".\"events\""
28249 );
28250 assert_eq!(
28251 super::sql_drop_table_query_in_schema(&mysql, "events", Some("analytics"))
28252 .expect("mysql drop"),
28253 "DROP TABLE IF EXISTS `analytics`.`events`"
28254 );
28255 assert_eq!(
28256 super::sql_drop_table_query_in_schema(&mssql, "events", Some("dbo"))
28257 .expect("mssql drop"),
28258 "DROP TABLE IF EXISTS [dbo].[events]"
28259 );
28260 }
28261
28262 #[test]
28263 fn fd90_12_quoting_matrix_truncate_uses_default_delete_with_per_dialect_quoting() {
28264 #[derive(Default)]
28267 struct CapturingAnsi {
28268 captured: std::cell::RefCell<Vec<String>>,
28269 }
28270 impl super::SqlConnection for CapturingAnsi {
28271 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28272 Ok(super::SqlQueryResult {
28273 columns: vec![],
28274 rows: vec![],
28275 })
28276 }
28277 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
28278 self.captured.borrow_mut().push(sql.to_owned());
28279 Ok(())
28280 }
28281 fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28282 Ok(false)
28283 }
28284 fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28285 Ok(())
28286 }
28287 fn dtype_sql(&self, _d: DType) -> &'static str {
28288 "TEXT"
28289 }
28290 fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28291 "TEXT"
28292 }
28293 fn supports_schemas(&self) -> bool {
28294 true
28295 }
28296 }
28297 let ansi = CapturingAnsi::default();
28298 super::SqlConnection::truncate_table(&ansi, "events", Some("public"))
28299 .expect("ansi truncate");
28300 assert_eq!(
28301 ansi.captured.borrow().as_slice(),
28302 &["DELETE FROM \"public\".\"events\"".to_owned()]
28303 );
28304
28305 #[derive(Default)]
28306 struct CapturingMysql {
28307 captured: std::cell::RefCell<Vec<String>>,
28308 }
28309 impl super::SqlConnection for CapturingMysql {
28310 fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28311 Ok(super::SqlQueryResult {
28312 columns: vec![],
28313 rows: vec![],
28314 })
28315 }
28316 fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
28317 self.captured.borrow_mut().push(sql.to_owned());
28318 Ok(())
28319 }
28320 fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28321 Ok(false)
28322 }
28323 fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28324 Ok(())
28325 }
28326 fn dtype_sql(&self, _d: DType) -> &'static str {
28327 "TEXT"
28328 }
28329 fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28330 "TEXT"
28331 }
28332 fn supports_schemas(&self) -> bool {
28333 true
28334 }
28335 fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28336 if ident.contains('\0') {
28337 return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28338 }
28339 Ok(format!("`{}`", ident.replace('`', "``")))
28340 }
28341 }
28342 let mysql = CapturingMysql::default();
28343 super::SqlConnection::truncate_table(&mysql, "events", Some("analytics"))
28344 .expect("mysql truncate");
28345 assert_eq!(
28346 mysql.captured.borrow().as_slice(),
28347 &["DELETE FROM `analytics`.`events`".to_owned()]
28348 );
28349 }
28350
28351 #[test]
28352 fn fd90_12_quoting_matrix_embedded_quote_chars_doubled_per_dialect() {
28353 let ansi = AnsiSchemaConn;
28358 let mysql = MysqlBacktickConn;
28359 let mssql = MssqlBracketConn;
28360 use super::SqlConnection as _;
28361 assert_eq!(ansi.quote_identifier("a\"b").expect("ansi"), "\"a\"\"b\"");
28362 assert_eq!(mysql.quote_identifier("a`b").expect("mysql"), "`a``b`");
28363 assert_eq!(mssql.quote_identifier("a]b").expect("mssql"), "[a]]b]");
28364 assert_eq!(
28366 ansi.quote_identifier("a`b")
28367 .expect("ansi backtick passthrough"),
28368 "\"a`b\""
28369 );
28370 assert_eq!(
28371 mysql
28372 .quote_identifier("a\"b")
28373 .expect("mysql quote passthrough"),
28374 "`a\"b`"
28375 );
28376 assert_eq!(
28377 mssql
28378 .quote_identifier("a\"b")
28379 .expect("mssql quote passthrough"),
28380 "[a\"b]"
28381 );
28382 }
28383
28384 #[test]
28385 fn fd90_12_quoting_matrix_long_identifier_within_cap_succeeds_over_cap_rejected() {
28386 use super::SqlConnection as _;
28388 let ansi = AnsiSchemaConn;
28389 let mysql = MysqlBacktickConn;
28390 let mssql = MssqlBracketConn;
28391 let pg63 = "a".repeat(63);
28392 let pg64 = "a".repeat(64);
28393 let mysql64 = "b".repeat(64);
28394 let mysql65 = "b".repeat(65);
28395 let mssql128 = "c".repeat(128);
28396 let mssql129 = "c".repeat(129);
28397
28398 super::validate_sql_identifier_length(&pg63, ansi.max_identifier_length(), "table")
28399 .expect("pg 63 ok");
28400 super::validate_sql_identifier_length(&mysql64, mysql.max_identifier_length(), "table")
28401 .expect("mysql 64 ok");
28402 super::validate_sql_identifier_length(&mssql128, mssql.max_identifier_length(), "table")
28403 .expect("mssql 128 ok");
28404
28405 let err =
28406 super::validate_sql_identifier_length(&pg64, ansi.max_identifier_length(), "table")
28407 .expect_err("pg 64 over cap");
28408 assert!(matches!(err, IoError::Sql(msg) if msg.contains("63") && msg.contains("table")));
28409 let err =
28410 super::validate_sql_identifier_length(&mysql65, mysql.max_identifier_length(), "table")
28411 .expect_err("mysql 65 over cap");
28412 assert!(matches!(err, IoError::Sql(msg) if msg.contains("64")));
28413 let err = super::validate_sql_identifier_length(
28414 &mssql129,
28415 mssql.max_identifier_length(),
28416 "table",
28417 )
28418 .expect_err("mssql 129 over cap");
28419 assert!(matches!(err, IoError::Sql(msg) if msg.contains("128")));
28420 }
28421
28422 #[test]
28423 fn fd90_12_query_builders_enforce_identifier_length_caps() {
28424 fn assert_length_error(err: IoError, kind: &str) {
28425 assert!(
28426 matches!(&err, IoError::Sql(msg)
28427 if msg.contains(kind)
28428 && msg.contains("63")
28429 && msg.contains("backend identifier limit")),
28430 "expected SQL identifier-length error for {kind}, got {err:?}"
28431 );
28432 }
28433
28434 let conn = AnsiSchemaConn;
28435 let over_cap = "a".repeat(64);
28436 let cols = vec![over_cap.clone()];
28437 let defs = vec!["id BIGINT".to_owned()];
28438
28439 assert_length_error(
28440 super::sql_select_all_query_in_schema(&conn, &over_cap, None)
28441 .expect_err("SELECT * table over cap"),
28442 "table",
28443 );
28444 assert_length_error(
28445 super::sql_select_all_query_in_schema(&conn, "events", Some(&over_cap))
28446 .expect_err("SELECT * schema over cap"),
28447 "schema",
28448 );
28449 assert_length_error(
28450 super::sql_select_columns_query_in_schema(&conn, "events", None, &[over_cap.as_str()])
28451 .expect_err("SELECT column over cap"),
28452 "column",
28453 );
28454 assert_length_error(
28455 super::sql_create_table_query_in_schema(&conn, &over_cap, None, &defs)
28456 .expect_err("CREATE table over cap"),
28457 "table",
28458 );
28459 assert_length_error(
28460 super::sql_insert_rows_query_in_schema(&conn, "events", None, &cols)
28461 .expect_err("INSERT column over cap"),
28462 "column",
28463 );
28464 assert_length_error(
28465 super::sql_multi_row_insert_query_in_schema(&conn, "events", None, &cols, 1)
28466 .expect_err("multi-row INSERT column over cap"),
28467 "column",
28468 );
28469 assert_length_error(
28470 super::sql_drop_table_query_in_schema(&conn, &over_cap, None)
28471 .expect_err("DROP table over cap"),
28472 "table",
28473 );
28474 assert_length_error(
28475 super::SqlConnection::truncate_table(&conn, &over_cap, None)
28476 .expect_err("TRUNCATE fallback table over cap"),
28477 "table",
28478 );
28479 }
28480
28481 #[test]
28482 fn fd90_12_quoting_matrix_special_characters_rejected_by_validator() {
28483 let bad: &[&str] = &[
28487 "my-col",
28488 "my.col",
28489 "my col",
28490 "my:col",
28491 "my'col",
28492 "my\"col",
28493 "my;col",
28494 "schema.table",
28495 "DROP--",
28496 "",
28497 ];
28498 for name in bad {
28499 let err =
28500 super::validate_sql_table_name(name).expect_err(&format!("must reject `{name}`"));
28501 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid table name")));
28502 let err = super::validate_sql_column_name(name)
28503 .expect_err(&format!("must reject col `{name}`"));
28504 assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid column name")));
28505 }
28506 }
28507
28508 #[test]
28509 fn fd90_12_quoting_matrix_nul_byte_rejected_at_quote_identifier_layer() {
28510 use super::SqlConnection as _;
28516 let ansi = AnsiSchemaConn;
28517 let mysql = MysqlBacktickConn;
28518 let mssql = MssqlBracketConn;
28519 let err_ansi = ansi
28520 .quote_identifier("ab\0cd")
28521 .expect_err("ansi must reject NUL");
28522 assert!(matches!(err_ansi, IoError::Sql(msg) if msg.contains("NUL")));
28523 let err_mysql = mysql
28524 .quote_identifier("ab\0cd")
28525 .expect_err("mysql must reject NUL");
28526 assert!(matches!(err_mysql, IoError::Sql(msg) if msg.contains("NUL")));
28527 let err_mssql = mssql
28528 .quote_identifier("ab\0cd")
28529 .expect_err("mssql must reject NUL");
28530 assert!(matches!(err_mssql, IoError::Sql(msg) if msg.contains("NUL")));
28531 }
28532
28533 #[cfg(feature = "sql-sqlite")]
28534 #[test]
28535 fn read_sql_empty_typed_table_preserves_column_dtypes_ex8ec() {
28536 let conn = make_sql_test_conn();
28537 super::SqlConnection::execute_batch(
28538 &conn,
28539 "CREATE TABLE empty_typed_ex8ec (i INTEGER, t TEXT, r REAL);",
28540 )
28541 .expect("create");
28542 let frame = read_sql(&conn, "SELECT * FROM empty_typed_ex8ec").expect("read empty");
28545 assert_eq!(frame.index().len(), 0, "empty table should yield zero rows");
28546
28547 let i_col = frame.column("i").expect("column i must exist");
28548 assert_eq!(i_col.dtype(), crate::DType::Int64);
28549 let t_col = frame.column("t").expect("column t must exist");
28550 assert_eq!(t_col.dtype(), crate::DType::Utf8);
28551 let r_col = frame.column("r").expect("column r must exist");
28552 assert_eq!(r_col.dtype(), crate::DType::Float64);
28553 }
28554
28555 #[cfg(feature = "sql-sqlite")]
28556 #[test]
28557 fn read_sql_all_null_typed_table_preserves_column_dtypes_0qo9c() {
28558 let conn = make_sql_test_conn();
28559 super::SqlConnection::execute_batch(
28560 &conn,
28561 "CREATE TABLE all_null_typed_0qo9c (i INTEGER, t TEXT, r REAL);
28562 INSERT INTO all_null_typed_0qo9c (i, t, r) VALUES (NULL, NULL, NULL);",
28563 )
28564 .expect("create and insert");
28565
28566 let frame = read_sql(&conn, "SELECT * FROM all_null_typed_0qo9c")
28567 .expect("read all-null typed table");
28568 assert_eq!(frame.index().len(), 1);
28569
28570 let i_col = frame.column("i").expect("column i must exist");
28571 assert_eq!(i_col.dtype(), crate::DType::Int64);
28572 assert!(i_col.values()[0].is_missing());
28573 let t_col = frame.column("t").expect("column t must exist");
28574 assert_eq!(t_col.dtype(), crate::DType::Utf8);
28575 assert!(t_col.values()[0].is_missing());
28576 let r_col = frame.column("r").expect("column r must exist");
28577 assert_eq!(r_col.dtype(), crate::DType::Float64);
28578 assert!(r_col.values()[0].is_missing());
28579 }
28580}
28581
28582#[cfg(test)]
28583mod fused_numeric_csv_field_tests {
28584 use super::{
28585 CsvTypedColumnValues, fuse_scan_numeric_csv_field, push_csv_default_numeric_field,
28586 push_fused_numeric_csv_field,
28587 };
28588
28589 fn assert_same_columns(base: &CsvTypedColumnValues, fused: &CsvTypedColumnValues, token: &str) {
28590 match (base, fused) {
28591 (CsvTypedColumnValues::Int64(lhs), CsvTypedColumnValues::Int64(rhs)) => {
28592 assert_eq!(lhs, rhs, "Int64 mismatch for token {token:?}");
28593 }
28594 (CsvTypedColumnValues::Float64(lhs), CsvTypedColumnValues::Float64(rhs)) => {
28595 let lhs_bits: Vec<u64> = lhs.iter().map(|value| value.to_bits()).collect();
28596 let rhs_bits: Vec<u64> = rhs.iter().map(|value| value.to_bits()).collect();
28597 assert_eq!(
28598 lhs_bits, rhs_bits,
28599 "Float64 bit mismatch for token {token:?}"
28600 );
28601 }
28602 _ => panic!("column dtype diverged for token {token:?}"),
28603 }
28604 }
28605
28606 #[test]
28610 fn fused_field_matches_fallback_parser() {
28611 let tokens = [
28612 "0",
28613 "-0",
28614 "+7",
28615 "007",
28616 "5",
28617 "-5",
28618 "123456789",
28619 "999999999999999999",
28620 "-999999999999999999",
28621 "9007199254740993",
28622 "1234567890123456789",
28623 "0.1",
28624 "-0.1",
28625 "+0.5",
28626 ".5",
28627 "-.5",
28628 "00.5",
28629 "5.",
28630 "12.34",
28631 "123456.7",
28632 "0.30000000000000004",
28633 "9007199254740993.0",
28634 "1e5",
28635 "1E5",
28636 "1.5e-3",
28637 "inf",
28638 "-inf",
28639 "nan",
28640 "NaN",
28641 "",
28642 " 5",
28643 "5 ",
28644 "true",
28645 "false",
28646 "TRUE",
28647 "1.2.3",
28648 "--5",
28649 "+-5",
28650 "1-2",
28651 "#N/A",
28652 "NULL",
28653 "abc",
28654 ];
28655
28656 for token in tokens {
28657 for suffix in ["", ",", "\n", ",9\n"] {
28658 let data = format!("{token}{suffix}");
28659 let Some(field) = fuse_scan_numeric_csv_field(data.as_bytes(), 0) else {
28660 continue;
28663 };
28664 assert_eq!(
28665 field.end,
28666 token.len(),
28667 "fused scanner must stop at the delimiter for token {token:?}"
28668 );
28669
28670 let mut base_int = CsvTypedColumnValues::Int64(Vec::new());
28673 assert!(
28674 push_csv_default_numeric_field(&mut base_int, token.as_bytes()),
28675 "fallback rejected fused-admitted token {token:?}"
28676 );
28677 let mut fused_int = CsvTypedColumnValues::Int64(Vec::new());
28678 push_fused_numeric_csv_field(&mut fused_int, &field);
28679 assert_same_columns(&base_int, &fused_int, token);
28680
28681 let mut base_float = CsvTypedColumnValues::Float64(Vec::new());
28682 assert!(
28683 push_csv_default_numeric_field(&mut base_float, token.as_bytes()),
28684 "fallback Float64 rejected fused-admitted token {token:?}"
28685 );
28686 let mut fused_float = CsvTypedColumnValues::Float64(Vec::new());
28687 push_fused_numeric_csv_field(&mut fused_float, &field);
28688 assert_same_columns(&base_float, &fused_float, token);
28689 }
28690 }
28691 }
28692
28693 #[test]
28697 fn fused_field_preserves_negative_zero_bits() {
28698 let field = fuse_scan_numeric_csv_field(b"-0,", 0).expect("fused scanner must admit -0");
28699 assert_eq!(field.int_value, Some(0));
28700 assert_eq!(field.float_value.to_bits(), (-0.0f64).to_bits());
28701
28702 let mut float_column = CsvTypedColumnValues::Float64(Vec::new());
28703 push_fused_numeric_csv_field(&mut float_column, &field);
28704 match float_column {
28705 CsvTypedColumnValues::Float64(values) => {
28706 assert_eq!(values.len(), 1);
28707 assert_eq!(values[0].to_bits(), (-0.0f64).to_bits());
28708 }
28709 CsvTypedColumnValues::Int64(_) => panic!("expected Float64 column"),
28710 }
28711 }
28712}
28713
28714#[cfg(test)]
28715mod merge_simple_numeric_csv_chunks_tests {
28716 use super::{CsvTypedColumnValues, merge_simple_numeric_csv_chunks};
28717
28718 fn reference_merge(
28721 parsed_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
28722 header_count: usize,
28723 ) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
28724 let mut final_is_float = vec![false; header_count];
28725 let mut row_count = 0i64;
28726 for (columns, rows) in &parsed_chunks {
28727 if columns.len() != header_count {
28728 return None;
28729 }
28730 row_count = row_count.checked_add(*rows)?;
28731 for (idx, column) in columns.iter().enumerate() {
28732 final_is_float[idx] |= matches!(column, CsvTypedColumnValues::Float64(_));
28733 }
28734 }
28735
28736 let capacity = usize::try_from(row_count).ok()?;
28737 let mut merged: Vec<CsvTypedColumnValues> = final_is_float
28738 .into_iter()
28739 .map(|is_float| {
28740 if is_float {
28741 CsvTypedColumnValues::Float64(Vec::with_capacity(capacity))
28742 } else {
28743 CsvTypedColumnValues::Int64(Vec::with_capacity(capacity))
28744 }
28745 })
28746 .collect();
28747
28748 for (columns, _) in parsed_chunks {
28749 for (dst, src) in merged.iter_mut().zip(columns) {
28750 match (dst, src) {
28751 (CsvTypedColumnValues::Int64(dst), CsvTypedColumnValues::Int64(src)) => {
28752 dst.extend(src);
28753 }
28754 (CsvTypedColumnValues::Float64(dst), CsvTypedColumnValues::Int64(src)) => {
28755 dst.extend(src.into_iter().map(|value| value as f64));
28756 }
28757 (CsvTypedColumnValues::Float64(dst), CsvTypedColumnValues::Float64(src)) => {
28758 dst.extend(src);
28759 }
28760 (CsvTypedColumnValues::Int64(_), CsvTypedColumnValues::Float64(_)) => {
28761 return None;
28762 }
28763 }
28764 }
28765 }
28766
28767 Some((merged, row_count))
28768 }
28769
28770 fn build_chunks(
28771 chunk_count: usize,
28772 rows_per_chunk: usize,
28773 header_count: usize,
28774 float_from_chunk_for_col: impl Fn(usize) -> usize,
28775 ) -> Vec<(Vec<CsvTypedColumnValues>, i64)> {
28776 (0..chunk_count)
28777 .map(|chunk| {
28778 let columns = (0..header_count)
28779 .map(|col| {
28780 if chunk >= float_from_chunk_for_col(col) {
28781 CsvTypedColumnValues::Float64(
28782 (0..rows_per_chunk)
28783 .map(|row| {
28784 (chunk * rows_per_chunk + row) as f64 * 0.5
28785 + col as f64 * 1000.0
28786 })
28787 .collect(),
28788 )
28789 } else {
28790 CsvTypedColumnValues::Int64(
28791 (0..rows_per_chunk)
28792 .map(|row| {
28793 (chunk * rows_per_chunk + row) as i64 + col as i64 * 1000
28794 })
28795 .collect(),
28796 )
28797 }
28798 })
28799 .collect();
28800 (columns, rows_per_chunk as i64)
28801 })
28802 .collect()
28803 }
28804
28805 fn assert_merge_matches_reference(
28806 chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
28807 header_count: usize,
28808 ) {
28809 let reference_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)> = chunks
28810 .iter()
28811 .map(|(columns, rows)| {
28812 let cloned = columns
28813 .iter()
28814 .map(|column| match column {
28815 CsvTypedColumnValues::Int64(values) => {
28816 CsvTypedColumnValues::Int64(values.clone())
28817 }
28818 CsvTypedColumnValues::Float64(values) => {
28819 CsvTypedColumnValues::Float64(values.clone())
28820 }
28821 })
28822 .collect();
28823 (cloned, *rows)
28824 })
28825 .collect();
28826
28827 let expected = reference_merge(reference_chunks, header_count);
28828 let actual = merge_simple_numeric_csv_chunks(chunks, header_count);
28829
28830 match (expected, actual) {
28831 (None, None) => {}
28832 (Some((expected_columns, expected_rows)), Some((actual_columns, actual_rows))) => {
28833 assert_eq!(expected_rows, actual_rows);
28834 assert_eq!(expected_columns.len(), actual_columns.len());
28835 for (idx, (lhs, rhs)) in expected_columns
28836 .iter()
28837 .zip(actual_columns.iter())
28838 .enumerate()
28839 {
28840 match (lhs, rhs) {
28841 (CsvTypedColumnValues::Int64(lhs), CsvTypedColumnValues::Int64(rhs)) => {
28842 assert_eq!(lhs, rhs, "Int64 column {idx} diverged");
28843 }
28844 (
28845 CsvTypedColumnValues::Float64(lhs),
28846 CsvTypedColumnValues::Float64(rhs),
28847 ) => {
28848 let lhs_bits: Vec<u64> =
28849 lhs.iter().map(|value| value.to_bits()).collect();
28850 let rhs_bits: Vec<u64> =
28851 rhs.iter().map(|value| value.to_bits()).collect();
28852 assert_eq!(lhs_bits, rhs_bits, "Float64 column {idx} bits diverged");
28853 }
28854 _ => panic!("column {idx} dtype diverged"),
28855 }
28856 }
28857 }
28858 (expected, actual) => panic!(
28859 "merge outcome diverged: reference some={} actual some={}",
28860 expected.is_some(),
28861 actual.is_some()
28862 ),
28863 }
28864 }
28865
28866 #[test]
28870 fn parallel_merge_matches_reference_with_mixed_promotion() {
28871 let chunks = build_chunks(8, 1000, 12, |col| match col % 4 {
28873 0 => usize::MAX, 1 => 0, 2 => 3, _ => 7, });
28878 assert_merge_matches_reference(chunks, 12);
28879 }
28880
28881 #[test]
28884 fn sequential_merge_matches_reference() {
28885 let chunks = build_chunks(3, 4, 5, |col| if col % 2 == 0 { usize::MAX } else { 1 });
28886 assert_merge_matches_reference(chunks, 5);
28887 }
28888
28889 #[test]
28892 fn parallel_merge_handles_narrow_and_ragged_widths() {
28893 for header_count in [2, 3, 7, 9, 17] {
28894 let chunks = build_chunks(5, 2000, header_count, |col| col % 6);
28895 assert_merge_matches_reference(chunks, header_count);
28896 }
28897 }
28898
28899 #[test]
28902 fn merge_rejects_mismatched_chunk_width() {
28903 let mut chunks = build_chunks(2, 10, 4, |_| usize::MAX);
28904 chunks[1].0.pop();
28905 assert!(merge_simple_numeric_csv_chunks(chunks, 4).is_none());
28906 }
28907}