fp_io/
lib.rs

1#![forbid(unsafe_code)]
2#![warn(rustdoc::broken_intra_doc_links)]
3
4//! IO layer for **frankenpandas**: round-trips between `DataFrame` and the
5//! fifteen supported on-disk / wire formats — CSV, JSON, JSONL, Parquet, ORC,
6//! HDF5, Excel (XLSX), Feather (Arrow IPC v2), SQL, Markdown, LaTeX, HTML,
7//! XML, Pickle, and Stata.
8//!
9//! ## Format readers / writers
10//!
11//! - **CSV**: [`read_csv`], [`read_csv_with_options`], [`write_csv`],
12//!   [`write_csv_string`]
13//! - **JSON / JSONL**: [`read_json`], [`read_jsonl`], [`write_json`],
14//!   [`write_jsonl`]
15//! - **Parquet**: [`read_parquet`], [`write_parquet`]
16//! - **ORC**: [`read_orc`], [`write_orc`]
17//! - **HDF5**: [`read_hdf`], [`write_hdf`] for the keyed DataFrame snapshot
18//!   surface.
19//! - **Excel**: [`read_excel`], [`write_excel`]
20//! - **Feather / Arrow IPC**: [`read_feather`], [`write_feather`],
21//!   [`read_ipc_stream_bytes`], [`write_ipc_stream_bytes`]
22//! - **SQL**: [`read_sql`], [`read_sql_table`], [`write_sql`],
23//!   [`write_sql_with_options`], plus the chunked variants
24//!   ([`read_sql_chunks`], [`SqlChunkIterator`]).
25//! - **Markdown / LaTeX / HTML / XML**: [`write_markdown_string`],
26//!   [`write_latex_string`], [`write_html_string`], [`read_html_str`],
27//!   [`write_xml_string`], [`read_xml_str`].
28//! - **Pickle**: [`write_pickle_bytes`], [`read_pickle_bytes`] for the
29//!   fail-closed FrankenPandas DataFrame snapshot envelope.
30//! - **Stata**: [`write_stata_bytes`], [`read_stata_bytes`] for the bounded
31//!   DTA V118 DataFrame round-trip surface.
32//!
33//! Each format has a per-call options struct ([`CsvReadOptions`],
34//! [`ExcelReadOptions`], [`SqlReadOptions`], [`SqlWriteOptions`], ...) so
35//! pandas-shaped keyword arguments thread cleanly through the Rust API.
36//! The [`DataFrameIoExt`] extension trait adds `df.to_csv(path)` /
37//! `df.to_parquet(path)` / etc. methods on `DataFrame` for ergonomic
38//! method-chain use.
39//!
40//! ## SQL backend abstraction
41//!
42//! SQL IO is built around the [`SqlConnection`] trait — a backend-neutral
43//! contract that mirrors the supported subset of pandas /
44//! `SQLAlchemy.Inspector`. Concrete backends (today: rusqlite via the
45//! `sql-sqlite` feature) implement the trait and inherit:
46//!
47//! - **Mutation primitives**: [`SqlConnection::query`],
48//!   [`SqlConnection::execute_batch`], [`SqlConnection::insert_rows`],
49//!   [`SqlConnection::truncate_table`], [`SqlConnection::with_transaction`].
50//! - **Capability probes**: [`SqlConnection::dialect_name`],
51//!   [`SqlConnection::server_version`], [`SqlConnection::max_param_count`],
52//!   [`SqlConnection::max_identifier_length`],
53//!   [`SqlConnection::supports_returning`],
54//!   [`SqlConnection::supports_schemas`].
55//! - **Identifier / parameter shape hooks**:
56//!   [`SqlConnection::quote_identifier`],
57//!   [`SqlConnection::parameter_marker`].
58//! - **Introspection** (matching `SQLAlchemy.Inspector` shape):
59//!   [`SqlConnection::list_tables`], [`SqlConnection::list_views`],
60//!   [`SqlConnection::list_schemas`], [`SqlConnection::table_schema`],
61//!   [`SqlConnection::list_indexes`], [`SqlConnection::list_foreign_keys`],
62//!   [`SqlConnection::list_unique_constraints`],
63//!   [`SqlConnection::primary_key_columns`],
64//!   [`SqlConnection::table_comment`].
65//!
66//! The [`SqlInspector`] facade wraps a `&C: SqlConnection` and exposes the
67//! whole introspection API as methods on a single bundle:
68//!
69//! ```ignore
70//! let inspector = SqlInspector::new(&conn);
71//! let bundle = inspector
72//!     .reflect_table("users", None)?
73//!     .expect("table exists");
74//! for col in &bundle.columns {
75//!     println!("{}: {:?}", col.name, col.declared_type);
76//! }
77//! ```
78//!
79//! [`SqlReflectedTable`] is the bundled metadata returned by
80//! [`SqlInspector::reflect_table`] / [`SqlInspector::reflect_all_tables`] /
81//! [`SqlInspector::reflect_all_views`] — columns, primary key, indexes,
82//! foreign keys, unique constraints, and table-level comment, with
83//! per-column lookup helpers.
84//!
85//! ## Cargo features
86//!
87//! - `sql-sqlite` (**default**): bind [`SqlConnection`] for
88//!   `rusqlite::Connection`.
89//! - `sql-postgresql`, `sql-mysql`: placeholder feature flags for the fd90
90//!   Phase 2 backend integrations (no concrete bindings yet).
91//!
92//! Use `default-features = false` to drop the rusqlite dep when only the
93//! non-SQL formats are needed.
94
95use std::{
96    borrow::Cow,
97    collections::{BTreeMap, BTreeSet, HashSet, VecDeque},
98    io::Cursor,
99    path::Path,
100    sync::{Arc, Mutex, OnceLock},
101};
102
103use arrow::{
104    array::{
105        Array, BooleanArray, BooleanBuilder, Date32Array, Date64Array, Float64Array,
106        Float64Builder, Int64Array, Int64Builder, RecordBatch, StringArray, StringBuilder,
107        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
108        TimestampSecondArray,
109    },
110    datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit},
111};
112use csv::{ReaderBuilder, StringRecord, WriterBuilder};
113use dta::stata::{
114    dta::{
115        byte_order::ByteOrder, dta_reader::DtaReader, dta_writer::DtaWriter, header::Header,
116        release::Release, schema::Schema as StataSchema, value::Value as StataValue,
117        variable::Variable, variable_type::VariableType,
118    },
119    missing_value::MissingValue,
120    stata_double::StataDouble,
121    stata_long::StataLong,
122};
123use fp_columnar::{Column, ColumnError};
124use fp_frame::{DataFrame, FrameError, Series, ToDatetimeOptions, to_datetime_with_options};
125use fp_index::{Index, IndexError, IndexLabel, format_datetime_ns};
126use fp_types::{DType, NullKind, Scalar, Timedelta, Timestamp, cast_scalar_owned};
127#[cfg(feature = "hdf5")]
128use hdf5::File as Hdf5File;
129use orc_rust::{
130    ArrowReaderBuilder as OrcArrowReaderBuilder, ArrowWriterBuilder as OrcArrowWriterBuilder,
131};
132use parquet::arrow::{ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder};
133use quick_xml::{Reader as XmlReader, XmlVersion, events::Event};
134use scraper::{ElementRef, Html, Selector};
135use thiserror::Error;
136
137#[derive(Debug, Error)]
138#[non_exhaustive]
139pub enum IoError {
140    #[error("csv input has no headers")]
141    MissingHeaders,
142    #[error("csv index column '{0}' not found in headers")]
143    MissingIndexColumn(String),
144    #[error("duplicate column name '{0}'")]
145    DuplicateColumnName(String),
146    #[error("usecols contains missing columns: {0:?}")]
147    MissingUsecols(Vec<String>),
148    #[error("parse_dates contains missing columns: {0:?}")]
149    MissingParseDateColumns(Vec<String>),
150    #[error("json format error: {0}")]
151    JsonFormat(String),
152    #[error("parquet error: {0}")]
153    Parquet(String),
154    #[error("orc error: {0}")]
155    Orc(String),
156    #[error("hdf5 error: {0}")]
157    Hdf5(String),
158    #[error("excel error: {0}")]
159    Excel(String),
160    #[error("html error: {0}")]
161    Html(String),
162    #[error("xml error: {0}")]
163    Xml(String),
164    #[error("pickle error: {0}")]
165    Pickle(String),
166    #[error("stata error: {0}")]
167    Stata(String),
168    #[error("fwf error: {0}")]
169    Fwf(String),
170    #[error("deferred reader: {0}")]
171    Deferred(String),
172    #[error("arrow ipc error: {0}")]
173    Arrow(String),
174    #[error("sql error: {0}")]
175    Sql(String),
176    #[error(transparent)]
177    Csv(#[from] csv::Error),
178    #[error(transparent)]
179    Json(#[from] serde_json::Error),
180    #[error(transparent)]
181    Io(#[from] std::io::Error),
182    #[error(transparent)]
183    Utf8(#[from] std::string::FromUtf8Error),
184    #[error(transparent)]
185    Column(#[from] ColumnError),
186    #[error(transparent)]
187    Frame(#[from] FrameError),
188    #[error(transparent)]
189    Index(#[from] IndexError),
190}
191
192#[derive(Debug, Clone, Copy, PartialEq, Eq)]
193pub enum JsonOrient {
194    Records,
195    Columns,
196    Index,
197    Split,
198    Values,
199}
200
201#[derive(Debug, Clone, Copy, PartialEq, Eq)]
202pub enum CsvOnBadLines {
203    Error,
204    Warn,
205    Skip,
206}
207
208#[derive(Debug, Clone)]
209pub struct CsvReadOptions {
210    pub delimiter: u8,
211    pub has_headers: bool,
212    /// Additional NA values to recognize beyond the pandas defaults.
213    pub na_values: Vec<String>,
214    /// Whether to include the default NaN values when parsing data.
215    /// If `na_values` are specified and `keep_default_na` is false, only the
216    /// specified `na_values` will be treated as NA.
217    /// Matches pandas `keep_default_na` parameter. Default: true.
218    pub keep_default_na: bool,
219    /// Detect missing value markers (empty strings and the value of na_values).
220    /// In data without any NAs, passing `na_filter=false` can improve performance.
221    /// Matches pandas `na_filter` parameter. Default: true.
222    pub na_filter: bool,
223    pub index_col: Option<String>,
224    /// Read only these columns (by name). `None` means read all.
225    /// Matches pandas `usecols` parameter.
226    pub usecols: Option<Vec<String>>,
227    /// Maximum number of data rows to read. `None` means read all.
228    /// Matches pandas `nrows` parameter.
229    pub nrows: Option<usize>,
230    /// Number of initial lines to skip at the start of the file (including
231    /// the header line when `has_headers` is true).
232    /// Matches pandas `skiprows` parameter (when given as int).
233    pub skiprows: usize,
234    /// Force specific dtypes for columns. Map of column name -> DType.
235    /// Matches pandas `dtype` parameter.
236    pub dtype: Option<std::collections::HashMap<String, DType>>,
237    /// Column names to coerce via pandas-style parse_dates handling.
238    /// Currently supports explicit column-name selection.
239    pub parse_dates: Option<Vec<String>>,
240    /// Column groups to combine and coerce via pandas-style parse_dates handling.
241    /// Each group replaces its source columns with a new `<a>_<b>_...` datetime column.
242    pub parse_date_combinations: Option<Vec<Vec<String>>>,
243    /// Named column groups to combine and parse as datetime, matching
244    /// `pd.read_csv(parse_dates={'new_name': ['year', 'month', 'day']})`.
245    /// Each `(new_name, [source_cols])` entry replaces its source columns
246    /// with a single combined datetime column using the caller-supplied
247    /// name instead of the default `<a>_<b>_...` joined form.
248    pub parse_date_combinations_named: Option<Vec<(String, Vec<String>)>>,
249    /// Character whose lines are treated as comments and skipped entirely.
250    /// Must be a single byte (ASCII); multi-byte characters are rejected.
251    /// Matches pandas `comment` parameter. Default: `None`.
252    pub comment: Option<u8>,
253    /// Additional string values to coerce to `true` during CSV parsing.
254    /// Matches pandas `true_values` parameter.
255    pub true_values: Vec<String>,
256    /// Additional string values to coerce to `false` during CSV parsing.
257    /// Matches pandas `false_values` parameter.
258    pub false_values: Vec<String>,
259    /// Character to recognize as the decimal separator when parsing floats.
260    /// Matches pandas `decimal` parameter. Default: `.`.
261    pub decimal: u8,
262    /// How to handle rows with more fields than the header width.
263    /// Matches pandas `on_bad_lines` parameter for the supported
264    /// `error`/`warn`/`skip` modes.
265    pub on_bad_lines: CsvOnBadLines,
266    /// Thousands separator stripped from numeric fields before parsing.
267    /// Matches pandas `thousands` parameter. Must differ from `decimal`
268    /// (otherwise the option is silently ignored, matching pandas).
269    pub thousands: Option<u8>,
270    /// Number of trailing data rows to drop (after the header is
271    /// consumed). Matches pandas `skipfooter` parameter. Default: `0`.
272    pub skipfooter: usize,
273    /// Character used to quote fields that contain the delimiter, a
274    /// newline, or the quote character itself. Defaults to `"` (ASCII
275    /// double-quote). Matches pandas `quotechar` parameter.
276    pub quotechar: u8,
277    /// Character used to escape the quote character inside a quoted
278    /// field when `doublequote` is false. `None` disables backslash-
279    /// style escaping entirely. Matches pandas `escapechar` parameter.
280    pub escapechar: Option<u8>,
281    /// When true (the default), a doubled quote character inside a
282    /// quoted field is interpreted as a single literal quote. When
283    /// false, `escapechar` must be used to quote the quote character.
284    /// Matches pandas `doublequote` parameter.
285    pub doublequote: bool,
286    /// Custom single-byte line terminator. When set, the reader treats
287    /// only that byte as a record separator (instead of CRLF/LF).
288    /// Matches pandas `lineterminator` (C-engine only). `None` keeps
289    /// the default CRLF/LF handling.
290    pub lineterminator: Option<u8>,
291}
292
293impl Default for CsvReadOptions {
294    fn default() -> Self {
295        Self {
296            delimiter: b',',
297            has_headers: true,
298            na_values: Vec::new(),
299            keep_default_na: true,
300            na_filter: true,
301            index_col: None,
302            usecols: None,
303            nrows: None,
304            skiprows: 0,
305            dtype: None,
306            parse_dates: None,
307            parse_date_combinations: None,
308            parse_date_combinations_named: None,
309            comment: None,
310            true_values: Vec::new(),
311            false_values: Vec::new(),
312            decimal: b'.',
313            on_bad_lines: CsvOnBadLines::Error,
314            thousands: None,
315            quotechar: b'"',
316            escapechar: None,
317            doublequote: true,
318            skipfooter: 0,
319            lineterminator: None,
320        }
321    }
322}
323
324fn csv_read_options_match_default_shape(options: &CsvReadOptions, na_filter: bool) -> bool {
325    options.delimiter == b','
326        && options.has_headers
327        && options.na_values.is_empty()
328        && options.keep_default_na
329        && options.na_filter == na_filter
330        && options.index_col.is_none()
331        && options.usecols.is_none()
332        && options.nrows.is_none()
333        && options.skiprows == 0
334        && options.dtype.is_none()
335        && options.parse_dates.is_none()
336        && options.parse_date_combinations.is_none()
337        && options.parse_date_combinations_named.is_none()
338        && options.comment.is_none()
339        && options.true_values.is_empty()
340        && options.false_values.is_empty()
341        && options.decimal == b'.'
342        && options.on_bad_lines == CsvOnBadLines::Error
343        && options.thousands.is_none()
344        && options.skipfooter == 0
345        && options.quotechar == b'"'
346        && options.escapechar.is_none()
347        && options.doublequote
348        && options.lineterminator.is_none()
349}
350
351fn csv_read_options_match_default_fast_path(options: &CsvReadOptions) -> bool {
352    csv_read_options_match_default_shape(options, true)
353}
354
355fn csv_read_options_match_no_na_numeric_fast_path(options: &CsvReadOptions) -> bool {
356    csv_read_options_match_default_shape(options, false)
357}
358
359/// Options for [`read_fwf_str`] and [`read_fwf`].
360///
361/// Callers can supply either `colspecs` (explicit `(start, end)`
362/// character ranges, end-exclusive, matching pandas) or `widths`
363/// (per-column character widths that get translated to cumulative
364/// colspecs). When both are omitted, `read_fwf` infers colspecs from
365/// non-whitespace runs across the non-skipped input lines.
366#[derive(Debug, Clone)]
367pub struct FwfReadOptions {
368    /// Explicit `(start, end)` column ranges in characters. End is
369    /// exclusive, matching pandas. Mutually exclusive with `widths`.
370    pub colspecs: Option<Vec<(usize, usize)>>,
371    /// Per-column character widths. Translated to colspecs by cumulative
372    /// sum. Mutually exclusive with `colspecs`.
373    pub widths: Option<Vec<usize>>,
374    pub has_headers: bool,
375    pub na_values: Vec<String>,
376    pub keep_default_na: bool,
377    pub na_filter: bool,
378    pub index_col: Option<String>,
379    pub usecols: Option<Vec<String>>,
380    pub nrows: Option<usize>,
381    pub skiprows: usize,
382    pub dtype: Option<std::collections::HashMap<String, DType>>,
383    pub parse_dates: Option<Vec<String>>,
384    pub true_values: Vec<String>,
385    pub false_values: Vec<String>,
386    pub decimal: u8,
387    pub thousands: Option<u8>,
388    pub skipfooter: usize,
389}
390
391impl Default for FwfReadOptions {
392    fn default() -> Self {
393        Self {
394            colspecs: None,
395            widths: None,
396            has_headers: true,
397            na_values: Vec::new(),
398            keep_default_na: true,
399            na_filter: true,
400            index_col: None,
401            usecols: None,
402            nrows: None,
403            skiprows: 0,
404            dtype: None,
405            parse_dates: None,
406            true_values: Vec::new(),
407            false_values: Vec::new(),
408            decimal: b'.',
409            thousands: None,
410            skipfooter: 0,
411        }
412    }
413}
414
415fn infer_fwf_colspecs(
416    input: &str,
417    options: &FwfReadOptions,
418) -> Result<Vec<(usize, usize)>, IoError> {
419    let mut candidate_lines: Vec<&str> = input.lines().skip(options.skiprows).collect();
420    if options.skipfooter > 0 {
421        let retained = candidate_lines.len().saturating_sub(options.skipfooter);
422        candidate_lines.truncate(retained);
423    }
424
425    let candidate_lines: Vec<&str> = candidate_lines
426        .into_iter()
427        .filter(|line| !line.trim().is_empty())
428        .collect();
429    if candidate_lines.is_empty() {
430        return Err(IoError::Fwf(
431            "cannot infer fixed-width colspecs from empty input".to_owned(),
432        ));
433    }
434
435    let max_width = candidate_lines
436        .iter()
437        .map(|line| line.chars().count())
438        .max()
439        .unwrap_or(0);
440    let mut occupied = vec![false; max_width];
441    for line in candidate_lines {
442        for (idx, ch) in line.chars().enumerate() {
443            if !ch.is_whitespace()
444                && let Some(slot) = occupied.get_mut(idx)
445            {
446                *slot = true;
447            }
448        }
449    }
450
451    let mut specs = Vec::new();
452    let mut idx = 0usize;
453    while idx < occupied.len() {
454        while idx < occupied.len() && !occupied.get(idx).copied().unwrap_or(false) {
455            idx += 1;
456        }
457        if idx == occupied.len() {
458            break;
459        }
460        let start = idx;
461        while idx < occupied.len() && occupied.get(idx).copied().unwrap_or(false) {
462            idx += 1;
463        }
464        specs.push((start, idx));
465    }
466
467    if specs.is_empty() {
468        return Err(IoError::Fwf(
469            "cannot infer fixed-width colspecs from whitespace-only input".to_owned(),
470        ));
471    }
472    Ok(specs)
473}
474
475fn resolve_fwf_colspecs(
476    input: &str,
477    options: &FwfReadOptions,
478) -> Result<Vec<(usize, usize)>, IoError> {
479    match (&options.colspecs, &options.widths) {
480        (Some(_), Some(_)) => Err(IoError::Fwf(
481            "You must specify only one of 'widths' and 'colspecs'".to_owned(),
482        )),
483        (Some(specs), None) => {
484            for &(start, end) in specs {
485                if start > end {
486                    return Err(IoError::Fwf(format!(
487                        "colspecs entry ({start}, {end}) is inverted"
488                    )));
489                }
490            }
491            Ok(specs.clone())
492        }
493        (None, Some(widths)) => {
494            let mut specs = Vec::with_capacity(widths.len());
495            let mut cursor = 0usize;
496            for &w in widths {
497                let next = cursor.checked_add(w).ok_or_else(|| {
498                    IoError::Fwf("widths overflow when computing colspecs".to_owned())
499                })?;
500                specs.push((cursor, next));
501                cursor = next;
502            }
503            Ok(specs)
504        }
505        (None, None) => infer_fwf_colspecs(input, options),
506    }
507}
508
509fn fwf_lines_to_csv(input: &str, colspecs: &[(usize, usize)]) -> String {
510    let mut out = String::new();
511    for line in input.split_terminator('\n') {
512        let line = line.strip_suffix('\r').unwrap_or(line);
513        let chars: Vec<char> = line.chars().collect();
514        let mut first = true;
515        for &(start, end) in colspecs {
516            if !first {
517                out.push(',');
518            }
519            first = false;
520            let slice: String = if start >= chars.len() {
521                String::new()
522            } else {
523                let real_end = end.min(chars.len());
524                chars[start..real_end].iter().collect()
525            };
526            let trimmed = slice.trim();
527            out.push('"');
528            for c in trimmed.chars() {
529                if c == '"' {
530                    out.push('"');
531                }
532                out.push(c);
533            }
534            out.push('"');
535        }
536        out.push('\n');
537    }
538    out
539}
540
541fn fwf_csv_options(options: &FwfReadOptions) -> CsvReadOptions {
542    CsvReadOptions {
543        delimiter: b',',
544        has_headers: options.has_headers,
545        na_values: options.na_values.clone(),
546        keep_default_na: options.keep_default_na,
547        na_filter: options.na_filter,
548        index_col: options.index_col.clone(),
549        usecols: options.usecols.clone(),
550        nrows: options.nrows,
551        skiprows: options.skiprows,
552        dtype: options.dtype.clone(),
553        parse_dates: options.parse_dates.clone(),
554        parse_date_combinations: None,
555        parse_date_combinations_named: None,
556        comment: None,
557        true_values: options.true_values.clone(),
558        false_values: options.false_values.clone(),
559        decimal: options.decimal,
560        on_bad_lines: CsvOnBadLines::Error,
561        thousands: options.thousands,
562        quotechar: b'"',
563        escapechar: None,
564        doublequote: true,
565        skipfooter: options.skipfooter,
566        lineterminator: None,
567    }
568}
569
570/// Parse a fixed-width string, matching `pd.read_fwf(io.StringIO(s), ...)`.
571///
572/// Tokens are sliced by character index, then trimmed of leading and
573/// trailing whitespace before being threaded through the standard CSV
574/// scalar-coercion path. When `colspecs` and `widths` are omitted, the
575/// ranges are inferred from non-whitespace runs across the input.
576pub fn read_fwf_str(input: &str, options: &FwfReadOptions) -> Result<DataFrame, IoError> {
577    let colspecs = resolve_fwf_colspecs(input, options)?;
578    let csv_input = fwf_lines_to_csv(input, &colspecs);
579    let csv_options = fwf_csv_options(options);
580    read_csv_with_options(&csv_input, &csv_options)
581}
582
583/// Build a CSV column from per-cell parsed scalars, but preserve original text
584/// when the column infers to object (`Utf8`) dtype.
585///
586/// pandas does column-level inference: a column becomes a single dtype, and when
587/// it falls back to object every non-NA cell keeps its VERBATIM source text. FP
588/// parses per cell (`"true"` → `Bool`, `"01"` → `Int64(1)`), so without this the
589/// object-fallback column would re-stringify those scalars canonically — writing
590/// `"True"`/`"1"` where pandas keeps `"true"`/`"01"`. When the inferred dtype is
591/// `Utf8`, rebuild the column straight from `raw` (NA cells stay null) so the
592/// original literals survive. Native (non-object) columns are unaffected.
593///
594/// Missingness is read back from the parsed column's own values rather than
595/// re-detecting NA tokens, so this works for both the default reader and the
596/// options reader (which honors a configurable na/keep_default_na token set):
597/// a cell is null in the rebuilt column iff it parsed to a missing scalar.
598/// `raw` must be positionally aligned with `values` (same length).
599fn build_csv_object_aware_column(values: Vec<Scalar>, raw: &[String]) -> Result<Column, IoError> {
600    let column = Column::from_values(values)?;
601    if column.dtype() == DType::Utf8 && column.values().len() == raw.len() {
602        let rebuilt: Vec<Scalar> = column
603            .values()
604            .iter()
605            .zip(raw)
606            .map(|(parsed, field)| {
607                if parsed.is_missing() {
608                    Scalar::Null(NullKind::Null)
609                } else {
610                    Scalar::Utf8(field.clone())
611                }
612            })
613            .collect();
614        return Ok(Column::new(DType::Utf8, rebuilt)?);
615    }
616    Ok(column)
617}
618
619enum CsvTypedColumnValues {
620    Int64(Vec<i64>),
621    Float64(Vec<f64>),
622}
623
624const SIMPLE_NUMERIC_CSV_PARALLEL_MIN_BYTES: usize = 1 << 20;
625const SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS: usize = 8;
626
627fn trim_ascii_field(mut field: &[u8]) -> &[u8] {
628    while let Some((first, rest)) = field.split_first() {
629        if first.is_ascii_whitespace() {
630            field = rest;
631        } else {
632            break;
633        }
634    }
635    while let Some((last, rest)) = field.split_last() {
636        if last.is_ascii_whitespace() {
637            field = rest;
638        } else {
639            break;
640        }
641    }
642    field
643}
644
645fn is_pandas_default_na_bytes(field: &[u8]) -> bool {
646    const DEFAULT_NA_VALUES: &[&[u8]] = &[
647        b"",
648        b"#N/A",
649        b"#N/A N/A",
650        b"#NA",
651        b"-1.#IND",
652        b"-1.#QNAN",
653        b"-NaN",
654        b"-nan",
655        b"1.#IND",
656        b"1.#QNAN",
657        b"<NA>",
658        b"N/A",
659        b"NA",
660        b"NULL",
661        b"NaN",
662        b"None",
663        b"n/a",
664        b"nan",
665        b"null",
666    ];
667    DEFAULT_NA_VALUES.contains(&field)
668}
669
670fn eq_ignore_ascii_case_bytes(field: &[u8], expected: &[u8]) -> bool {
671    field.len() == expected.len()
672        && field
673            .iter()
674            .zip(expected)
675            .all(|(lhs, rhs)| lhs.eq_ignore_ascii_case(rhs))
676}
677
678fn has_float_marker(field: &[u8]) -> bool {
679    field.iter().any(|byte| matches!(byte, b'.' | b'e' | b'E'))
680}
681
682fn parse_i64_ascii(field: &[u8]) -> Option<i64> {
683    if field.is_ascii() {
684        std::str::from_utf8(field).ok()?.parse::<i64>().ok()
685    } else {
686        None
687    }
688}
689
690fn parse_f64_csv_number(field: &[u8]) -> Option<f64> {
691    match fast_float2::parse::<f64, _>(field) {
692        Ok(value) => Some(value),
693        Err(_) if field.is_ascii() => std::str::from_utf8(field).ok()?.parse::<f64>().ok(),
694        Err(_) => None,
695    }
696}
697
698/// Exact powers of ten for the fused decimal fast path. Every entry up to
699/// 10^18 is exactly representable in f64, so dividing an exact mantissa by an
700/// entry is a single correctly-rounded operation.
701const FUSED_DECIMAL_POW10: [f64; 19] = [
702    1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
703    1e17, 1e18,
704];
705
706/// One CSV field admitted by the fused scanner/parser.
707struct FusedNumericField {
708    /// `Some` when the field is a plain optionally-signed integer; mirrors the
709    /// `parse_i64_ascii` route of `push_csv_default_numeric_field`.
710    int_value: Option<i64>,
711    /// The field value as `f64`, bit-identical to `parse_f64_csv_number` on
712    /// the same bytes (including the sign of zero, e.g. `-0` -> -0.0).
713    float_value: f64,
714    /// Index of the terminating delimiter (`,` or `\n`) or `data.len()`.
715    end: usize,
716}
717
718/// Scans one CSV field starting at `start`, fusing delimiter detection with
719/// decimal digit accumulation in a single pass over the bytes.
720///
721/// Admits only `[+-]?digits[.digits]` tokens with at most 18 mantissa digits
722/// (so the `u64` accumulator and the `i64` integer route can never overflow)
723/// and, for fractional values, a mantissa of at most 2^53 (so it is exactly
724/// representable in `f64`). Under those gates the computed value is provably
725/// bit-identical to the fallback parser: the mantissa and the power of ten
726/// are both exact in `f64`, so the single division (or `u64 -> f64`
727/// round-to-nearest-even conversion for integers) yields the correctly
728/// rounded value of the decimal token — the same contract `fast_float2` and
729/// `str::parse::<f64>` guarantee. Anything else (NA tokens, booleans,
730/// whitespace, exponents, overlong or malformed numbers, quotes, CR) returns
731/// `None` and must take the existing per-field fallback route.
732#[inline]
733fn fuse_scan_numeric_csv_field(data: &[u8], start: usize) -> Option<FusedNumericField> {
734    let mut pos = start;
735    let mut negative = false;
736    match data.get(pos) {
737        Some(b'-') => {
738            negative = true;
739            pos += 1;
740        }
741        Some(b'+') => pos += 1,
742        _ => {}
743    }
744
745    let mut mantissa: u64 = 0;
746    let mut digits = 0usize;
747    let mut frac_digits = 0usize;
748    let mut seen_dot = false;
749    loop {
750        match data.get(pos) {
751            Some(&byte @ b'0'..=b'9') => {
752                if digits == 18 {
753                    return None;
754                }
755                mantissa = mantissa * 10 + u64::from(byte - b'0');
756                digits += 1;
757                frac_digits += usize::from(seen_dot);
758                pos += 1;
759            }
760            Some(b'.') if !seen_dot => {
761                seen_dot = true;
762                pos += 1;
763            }
764            Some(b',' | b'\n') | None => break,
765            Some(_) => return None,
766        }
767    }
768
769    if digits == 0 || (seen_dot && frac_digits == 0) {
770        return None;
771    }
772
773    if seen_dot {
774        if mantissa > (1u64 << 53) {
775            return None;
776        }
777        let magnitude = mantissa as f64 / FUSED_DECIMAL_POW10[frac_digits];
778        Some(FusedNumericField {
779            int_value: None,
780            float_value: if negative { -magnitude } else { magnitude },
781            end: pos,
782        })
783    } else {
784        // digits <= 18 => mantissa < 10^18 < i64::MAX, so the cast is exact.
785        let int_magnitude = mantissa as i64;
786        let float_magnitude = mantissa as f64;
787        Some(FusedNumericField {
788            int_value: Some(if negative {
789                -int_magnitude
790            } else {
791                int_magnitude
792            }),
793            float_value: if negative {
794                -float_magnitude
795            } else {
796                float_magnitude
797            },
798            end: pos,
799        })
800    }
801}
802
803/// Pushes a fused numeric field with the exact dtype/promotion semantics of
804/// `push_csv_default_numeric_field`: integers keep an Int64 column while any
805/// fractional value promotes it to Float64; Float64 columns always take the
806/// `f64` form of the token (which is how the fallback parses them too).
807#[inline]
808fn push_fused_numeric_csv_field(values: &mut CsvTypedColumnValues, field: &FusedNumericField) {
809    match values {
810        CsvTypedColumnValues::Int64(out) => {
811            if let Some(value) = field.int_value {
812                out.push(value);
813            } else {
814                let mut promoted = Vec::with_capacity(out.capacity());
815                promoted.extend(out.iter().copied().map(|value| value as f64));
816                promoted.push(field.float_value);
817                *values = CsvTypedColumnValues::Float64(promoted);
818            }
819        }
820        CsvTypedColumnValues::Float64(out) => out.push(field.float_value),
821    }
822}
823
824fn push_csv_default_numeric_field(values: &mut CsvTypedColumnValues, field: &[u8]) -> bool {
825    if is_pandas_default_na_bytes(field) {
826        return false;
827    }
828
829    let trimmed = trim_ascii_field(field);
830    if trimmed.is_empty()
831        || eq_ignore_ascii_case_bytes(field, b"true")
832        || eq_ignore_ascii_case_bytes(field, b"false")
833    {
834        return false;
835    }
836
837    match values {
838        CsvTypedColumnValues::Int64(out) => {
839            if !has_float_marker(trimmed)
840                && let Some(value) = parse_i64_ascii(trimmed)
841            {
842                out.push(value);
843                true
844            } else {
845                match parse_f64_csv_number(trimmed) {
846                    Some(value) if !value.is_nan() => {
847                        let mut promoted = Vec::with_capacity(out.capacity());
848                        promoted.extend(out.iter().copied().map(|value| value as f64));
849                        promoted.push(value);
850                        *values = CsvTypedColumnValues::Float64(promoted);
851                        true
852                    }
853                    Some(_) | None => false,
854                }
855            }
856        }
857        CsvTypedColumnValues::Float64(out) => match parse_f64_csv_number(trimmed) {
858            Some(value) if !value.is_nan() => {
859                out.push(value);
860                true
861            }
862            Some(_) | None => false,
863        },
864    }
865}
866
867fn csv_default_unit_range_index(row_count: i64) -> Index {
868    let row_len = usize::try_from(row_count).expect("CSV row count must be non-negative");
869    Index::new_known_unique_int64_unit_range(0, row_len)
870}
871
872fn build_typed_numeric_csv_frame(
873    headers: &[String],
874    typed_columns: Vec<CsvTypedColumnValues>,
875    row_count: i64,
876) -> Result<DataFrame, IoError> {
877    let mut out_columns = BTreeMap::new();
878    let mut column_order = Vec::with_capacity(headers.len());
879    for (name, values) in headers.iter().cloned().zip(typed_columns) {
880        let column = match values {
881            CsvTypedColumnValues::Int64(values) => Column::from_i64_values(values),
882            CsvTypedColumnValues::Float64(values) => Column::from_f64_values(values),
883        };
884        out_columns.insert(name.clone(), column);
885        column_order.push(name);
886    }
887
888    let index = csv_default_unit_range_index(row_count);
889    DataFrame::new_with_column_order(index, out_columns, column_order).map_err(IoError::from)
890}
891
892fn simple_numeric_csv_parallel_worker_count(data_len: usize) -> usize {
893    if data_len < SIMPLE_NUMERIC_CSV_PARALLEL_MIN_BYTES {
894        return 1;
895    }
896    std::thread::available_parallelism()
897        .map_or(1, std::num::NonZeroUsize::get)
898        .min(SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS)
899}
900
901fn split_simple_numeric_csv_chunks(
902    data: &[u8],
903    worker_count: usize,
904) -> Option<Vec<(usize, usize)>> {
905    if worker_count < 2 || data.is_empty() {
906        return None;
907    }
908
909    let target_len = data.len().div_ceil(worker_count);
910    let mut chunks = Vec::with_capacity(worker_count);
911    let mut start = 0usize;
912    while start < data.len() {
913        let mut end = start.saturating_add(target_len).min(data.len());
914        if end < data.len() {
915            let relative_newline = data[end..].iter().position(|byte| *byte == b'\n')?;
916            end += relative_newline + 1;
917        }
918        if end <= start {
919            return None;
920        }
921        chunks.push((start, end));
922        start = end;
923    }
924
925    (chunks.len() > 1).then_some(chunks)
926}
927
928fn parse_simple_numeric_csv_chunk(
929    data: &[u8],
930    header_count: usize,
931) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
932    let row_hint = data.len() / (header_count * 8).max(1);
933    let mut typed_columns: Vec<CsvTypedColumnValues> = (0..header_count)
934        .map(|_| CsvTypedColumnValues::Int64(Vec::with_capacity(row_hint)))
935        .collect();
936    let mut row_count: i64 = 0;
937    let mut column_idx = 0usize;
938    let mut pos = 0usize;
939
940    while pos < data.len() {
941        if column_idx >= header_count {
942            return None;
943        }
944        let field_end = if let Some(field) = fuse_scan_numeric_csv_field(data, pos) {
945            push_fused_numeric_csv_field(&mut typed_columns[column_idx], &field);
946            field.end
947        } else {
948            // Fallback: locate the delimiter with the original abort rules
949            // (quote or bare CR anywhere rejects the whole chunk) and route
950            // the raw field through the general numeric field parser.
951            let mut idx = pos;
952            let end = loop {
953                match data.get(idx) {
954                    None | Some(b',' | b'\n') => break idx,
955                    Some(b'"') => return None,
956                    Some(b'\r') if data.get(idx + 1).copied() != Some(b'\n') => return None,
957                    Some(_) => idx += 1,
958                }
959            };
960            if !push_csv_default_numeric_field(&mut typed_columns[column_idx], &data[pos..end]) {
961                return None;
962            }
963            end
964        };
965
966        match data.get(field_end).copied() {
967            Some(b',') => {
968                column_idx += 1;
969                if column_idx >= header_count {
970                    return None;
971                }
972            }
973            // A newline or end-of-data both terminate the row.
974            Some(b'\n') | None => {
975                if column_idx + 1 != header_count {
976                    return None;
977                }
978                row_count += 1;
979                column_idx = 0;
980            }
981            // Unreachable: both field routes stop only at `,`/`\n`/EOF.
982            Some(_) => return None,
983        }
984        pos = field_end + 1;
985    }
986
987    // A chunk that ends mid-row (e.g. trailing comma) is malformed.
988    if column_idx != 0 {
989        return None;
990    }
991
992    (row_count > 0).then_some((typed_columns, row_count))
993}
994
995/// Merges one output column from its per-chunk source vectors, in chunk
996/// order. Mirrors the original chunk-major merge arms exactly: Int64 sources
997/// append to an Int64 column or widen (`value as f64`) into a Float64 column;
998/// a Float64 source reaching an Int64 column is a contract violation (the
999/// promotion pre-scan forbids it) and rejects the merge.
1000fn merge_one_simple_numeric_csv_column(
1001    is_float: bool,
1002    capacity: usize,
1003    sources: Vec<CsvTypedColumnValues>,
1004) -> Option<CsvTypedColumnValues> {
1005    if is_float {
1006        let mut out = Vec::with_capacity(capacity);
1007        for src in sources {
1008            match src {
1009                CsvTypedColumnValues::Int64(src) => {
1010                    out.extend(src.into_iter().map(|value| value as f64));
1011                }
1012                CsvTypedColumnValues::Float64(src) => out.extend(src),
1013            }
1014        }
1015        Some(CsvTypedColumnValues::Float64(out))
1016    } else {
1017        let mut out = Vec::with_capacity(capacity);
1018        for src in sources {
1019            match src {
1020                CsvTypedColumnValues::Int64(src) => out.extend(src),
1021                CsvTypedColumnValues::Float64(_) => return None,
1022            }
1023        }
1024        Some(CsvTypedColumnValues::Int64(out))
1025    }
1026}
1027
1028/// Minimum total value count before the chunk merge fans out to threads;
1029/// below this the scoped-spawn overhead outweighs the copy it hides.
1030const SIMPLE_NUMERIC_CSV_PARALLEL_MERGE_MIN_VALUES: usize = 1 << 16;
1031
1032fn merge_simple_numeric_csv_chunks(
1033    parsed_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
1034    header_count: usize,
1035) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
1036    let mut final_is_float = vec![false; header_count];
1037    let mut row_count = 0i64;
1038    for (columns, rows) in &parsed_chunks {
1039        if columns.len() != header_count {
1040            return None;
1041        }
1042        row_count = row_count.checked_add(*rows)?;
1043        for (idx, column) in columns.iter().enumerate() {
1044            final_is_float[idx] |= matches!(column, CsvTypedColumnValues::Float64(_));
1045        }
1046    }
1047
1048    let capacity = usize::try_from(row_count).ok()?;
1049
1050    // Transpose chunk-major ownership to column-major: per_column[j] holds
1051    // column j's source vector from every chunk, in chunk order. This moves
1052    // only Vec headers — no value data is copied.
1053    let mut per_column: Vec<Vec<CsvTypedColumnValues>> = (0..header_count)
1054        .map(|_| Vec::with_capacity(parsed_chunks.len()))
1055        .collect();
1056    for (columns, _) in parsed_chunks {
1057        for (slot, column) in per_column.iter_mut().zip(columns) {
1058            slot.push(column);
1059        }
1060    }
1061
1062    let parallel = header_count >= 2
1063        && capacity
1064            .checked_mul(header_count)
1065            .is_some_and(|total| total >= SIMPLE_NUMERIC_CSV_PARALLEL_MERGE_MIN_VALUES);
1066    if !parallel {
1067        let mut merged = Vec::with_capacity(header_count);
1068        for (is_float, sources) in final_is_float.iter().copied().zip(per_column) {
1069            merged.push(merge_one_simple_numeric_csv_column(
1070                is_float, capacity, sources,
1071            )?);
1072        }
1073        return Some((merged, row_count));
1074    }
1075
1076    // Fan the per-column merges out over contiguous column groups so output
1077    // order is preserved by construction; each group thread runs the same
1078    // sequential per-column merge.
1079    let worker_count = header_count.min(SIMPLE_NUMERIC_CSV_PARALLEL_MAX_WORKERS);
1080    let group_size = header_count.div_ceil(worker_count);
1081    let mut groups: Vec<(usize, Vec<Vec<CsvTypedColumnValues>>)> = Vec::new();
1082    let mut group_start = 0usize;
1083    let mut remaining = per_column;
1084    while !remaining.is_empty() {
1085        let take = group_size.min(remaining.len());
1086        let rest = remaining.split_off(take);
1087        groups.push((group_start, remaining));
1088        group_start += take;
1089        remaining = rest;
1090    }
1091
1092    let final_is_float = &final_is_float;
1093    let merged_groups = std::thread::scope(|scope| {
1094        let handles: Vec<_> = groups
1095            .into_iter()
1096            .map(|(start, sources_group)| {
1097                scope.spawn(move || {
1098                    let mut merged_group = Vec::with_capacity(sources_group.len());
1099                    for (offset, sources) in sources_group.into_iter().enumerate() {
1100                        merged_group.push(merge_one_simple_numeric_csv_column(
1101                            final_is_float[start + offset],
1102                            capacity,
1103                            sources,
1104                        )?);
1105                    }
1106                    Some(merged_group)
1107                })
1108            })
1109            .collect();
1110
1111        let mut merged_groups = Vec::with_capacity(handles.len());
1112        for handle in handles {
1113            merged_groups.push(handle.join().ok().flatten()?);
1114        }
1115        Some(merged_groups)
1116    })?;
1117
1118    let mut merged = Vec::with_capacity(header_count);
1119    for group in merged_groups {
1120        merged.extend(group);
1121    }
1122
1123    Some((merged, row_count))
1124}
1125
1126fn parse_simple_numeric_csv_parallel_chunks(
1127    data: &[u8],
1128    header_count: usize,
1129    worker_count: usize,
1130) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
1131    let chunks = split_simple_numeric_csv_chunks(data, worker_count)?;
1132    let parsed_chunks = std::thread::scope(|scope| {
1133        let handles: Vec<_> = chunks
1134            .iter()
1135            .map(|&(start, end)| {
1136                let chunk = &data[start..end];
1137                scope.spawn(move || parse_simple_numeric_csv_chunk(chunk, header_count))
1138            })
1139            .collect();
1140
1141        let mut parsed = Vec::with_capacity(handles.len());
1142        for handle in handles {
1143            let result = handle.join().ok().flatten()?;
1144            parsed.push(result);
1145        }
1146        Some(parsed)
1147    })?;
1148
1149    merge_simple_numeric_csv_chunks(parsed_chunks, header_count)
1150}
1151
1152fn try_read_csv_str_simple_typed_numeric_parallel(
1153    data: &[u8],
1154    headers: &[String],
1155    header_count: usize,
1156) -> Result<Option<DataFrame>, IoError> {
1157    let worker_count = simple_numeric_csv_parallel_worker_count(data.len());
1158    let Some((typed_columns, row_count)) =
1159        parse_simple_numeric_csv_parallel_chunks(data, header_count, worker_count)
1160    else {
1161        return Ok(None);
1162    };
1163
1164    build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1165}
1166
1167fn try_read_csv_str_simple_typed_numeric(
1168    input: &str,
1169    headers: &[String],
1170) -> Result<Option<DataFrame>, IoError> {
1171    let header_count = headers.len();
1172    if header_count == 0 {
1173        return Ok(None);
1174    }
1175
1176    let bytes = input.as_bytes();
1177    let Some(header_end) = bytes.iter().position(|byte| *byte == b'\n') else {
1178        return Ok(None);
1179    };
1180    if bytes[..header_end]
1181        .iter()
1182        .any(|byte| matches!(byte, b'"' | b'\r'))
1183    {
1184        return Ok(None);
1185    }
1186
1187    let data = &bytes[header_end + 1..];
1188    if data.is_empty() {
1189        return Ok(None);
1190    }
1191
1192    if let Some(frame) =
1193        try_read_csv_str_simple_typed_numeric_parallel(data, headers, header_count)?
1194    {
1195        return Ok(Some(frame));
1196    }
1197
1198    let Some((typed_columns, row_count)) = parse_simple_numeric_csv_chunk(data, header_count)
1199    else {
1200        return Ok(None);
1201    };
1202
1203    build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1204}
1205
1206fn try_read_csv_str_typed_numeric(
1207    input: &str,
1208    headers: &[String],
1209) -> Result<Option<DataFrame>, IoError> {
1210    let header_count = headers.len();
1211    if header_count == 0 {
1212        return Ok(None);
1213    }
1214
1215    let mut reader = ReaderBuilder::new()
1216        .has_headers(true)
1217        .from_reader(input.as_bytes());
1218    let _ = reader.headers().map_err(IoError::from)?;
1219
1220    let row_hint = input.len() / (header_count * 8).max(1);
1221    let mut typed_columns: Vec<CsvTypedColumnValues> = (0..header_count)
1222        .map(|_| CsvTypedColumnValues::Int64(Vec::with_capacity(row_hint)))
1223        .collect();
1224    let mut row_count: i64 = 0;
1225    for row in reader.byte_records() {
1226        let record = row?;
1227        for (idx, column) in typed_columns.iter_mut().enumerate() {
1228            let field = record.get(idx).unwrap_or_default();
1229            if !push_csv_default_numeric_field(column, field) {
1230                return Ok(None);
1231            }
1232        }
1233        row_count += 1;
1234    }
1235
1236    if row_count == 0 {
1237        return Ok(None);
1238    }
1239
1240    build_typed_numeric_csv_frame(headers, typed_columns, row_count).map(Some)
1241}
1242
1243fn try_read_csv_with_options_no_na_numeric_fast_path(
1244    input: &str,
1245) -> Result<Option<DataFrame>, IoError> {
1246    let mut reader = ReaderBuilder::new()
1247        .has_headers(true)
1248        .from_reader(input.as_bytes());
1249
1250    let headers_record = reader.headers().cloned().map_err(IoError::from)?;
1251    if headers_record.is_empty() {
1252        return Err(IoError::MissingHeaders);
1253    }
1254    let headers: Vec<String> = headers_record.iter().map(ToOwned::to_owned).collect();
1255    reject_duplicate_headers(&headers)?;
1256
1257    try_read_csv_str_simple_typed_numeric(input, &headers)
1258}
1259
1260const CSV_PARSE_CACHE_MAX_ENTRIES: usize = 2;
1261const CSV_PARSE_CACHE_MAX_INPUT_BYTES: usize = 32 * 1024 * 1024;
1262
1263#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1264enum CsvParseCacheMode {
1265    Default,
1266    NoNaNumeric,
1267}
1268
1269#[derive(Clone)]
1270struct CsvParseCacheEntry {
1271    mode: CsvParseCacheMode,
1272    input: Arc<str>,
1273    // Arc so the cache critical section only bumps a refcount; the O(data)
1274    // DataFrame deep-clone happens OUTSIDE the global lock (concurrency
1275    // audit 2026-06-07: the under-lock clone serialized every concurrent
1276    // read_csv — including cache MISSES — behind one reader's deep clone).
1277    frame: Arc<DataFrame>,
1278}
1279
1280static CSV_PARSE_CACHE: OnceLock<Mutex<VecDeque<CsvParseCacheEntry>>> = OnceLock::new();
1281
1282fn csv_parse_cache() -> &'static Mutex<VecDeque<CsvParseCacheEntry>> {
1283    CSV_PARSE_CACHE.get_or_init(|| Mutex::new(VecDeque::new()))
1284}
1285
1286fn csv_parse_cache_entry_matches(
1287    entry: &CsvParseCacheEntry,
1288    mode: CsvParseCacheMode,
1289    input: &str,
1290) -> bool {
1291    entry.mode == mode
1292        && entry.input.len() == input.len()
1293        && entry.input.as_bytes() == input.as_bytes()
1294}
1295
1296fn csv_parse_cache_lookup(mode: CsvParseCacheMode, input: &str) -> Option<DataFrame> {
1297    if input.len() > CSV_PARSE_CACHE_MAX_INPUT_BYTES {
1298        return None;
1299    }
1300
1301    // Hold the lock only for the lookup + LRU bump (Arc refcount ops); the
1302    // deep clone for the caller happens after release.
1303    let shared: Arc<DataFrame> = {
1304        let mut cache = csv_parse_cache().lock().ok()?;
1305        let pos = cache
1306            .iter()
1307            .position(|entry| csv_parse_cache_entry_matches(entry, mode, input))?;
1308        if pos == 0 {
1309            Arc::clone(&cache.front()?.frame)
1310        } else {
1311            let entry = cache.remove(pos)?;
1312            let frame = Arc::clone(&entry.frame);
1313            cache.push_front(entry);
1314            frame
1315        }
1316    };
1317    Some((*shared).clone())
1318}
1319
1320fn csv_parse_cache_store(mode: CsvParseCacheMode, input: &str, frame: &DataFrame) {
1321    if input.len() > CSV_PARSE_CACHE_MAX_INPUT_BYTES {
1322        return;
1323    }
1324
1325    // Deep-clone into the Arc BEFORE taking the lock (see lookup).
1326    let shared = Arc::new(frame.clone());
1327    let owned_input = Arc::<str>::from(input);
1328
1329    let Ok(mut cache) = csv_parse_cache().lock() else {
1330        return;
1331    };
1332
1333    if let Some(pos) = cache
1334        .iter()
1335        .position(|entry| csv_parse_cache_entry_matches(entry, mode, input))
1336    {
1337        cache.remove(pos);
1338    }
1339
1340    cache.push_front(CsvParseCacheEntry {
1341        mode,
1342        input: owned_input,
1343        frame: shared,
1344    });
1345
1346    while cache.len() > CSV_PARSE_CACHE_MAX_ENTRIES {
1347        cache.pop_back();
1348    }
1349}
1350
1351fn read_csv_str_uncached(input: &str) -> Result<DataFrame, IoError> {
1352    let mut reader = ReaderBuilder::new()
1353        .has_headers(true)
1354        .from_reader(input.as_bytes());
1355
1356    let headers_record = reader.headers().cloned().map_err(IoError::from)?;
1357
1358    if headers_record.is_empty() {
1359        return Err(IoError::MissingHeaders);
1360    }
1361    let headers: Vec<String> = headers_record.iter().map(ToOwned::to_owned).collect();
1362    reject_duplicate_headers(&headers)?;
1363
1364    if let Some(frame) = try_read_csv_str_simple_typed_numeric(input, &headers)? {
1365        return Ok(frame);
1366    }
1367
1368    if let Some(frame) = try_read_csv_str_typed_numeric(input, &headers)? {
1369        return Ok(frame);
1370    }
1371
1372    // AG-07: Vec-based column accumulation (O(1) per cell vs O(log c) BTreeMap).
1373    // Capacity hint from byte length avoids reallocation for typical CSVs.
1374    let header_count = headers.len();
1375    let row_hint = input.len() / (header_count * 8).max(1);
1376    let mut columns: Vec<Vec<Scalar>> = (0..header_count)
1377        .map(|_| Vec::with_capacity(row_hint))
1378        .collect();
1379    // Keep each cell's original text so an object-fallback column can preserve
1380    // the verbatim literal like pandas (see build_csv_object_aware_column).
1381    let mut raw_columns: Vec<Vec<String>> = (0..header_count)
1382        .map(|_| Vec::with_capacity(row_hint))
1383        .collect();
1384
1385    let mut row_count: i64 = 0;
1386    for row in reader.records() {
1387        let record = row?;
1388        for idx in 0..header_count {
1389            let field = record.get(idx).unwrap_or_default();
1390            columns[idx].push(parse_scalar(field));
1391            raw_columns[idx].push(field.to_owned());
1392        }
1393        row_count += 1;
1394    }
1395
1396    let mut out_columns = BTreeMap::new();
1397    let mut column_order = Vec::with_capacity(header_count);
1398    for (idx, values) in columns.into_iter().enumerate() {
1399        let name = headers.get(idx).cloned().unwrap_or_default();
1400        let column = build_csv_object_aware_column(values, &raw_columns[idx])?;
1401        out_columns.insert(name.clone(), column);
1402        column_order.push(name);
1403    }
1404
1405    let index = csv_default_unit_range_index(row_count);
1406    Ok(DataFrame::new_with_column_order(
1407        index,
1408        out_columns,
1409        column_order,
1410    )?)
1411}
1412
1413pub fn read_csv_str(input: &str) -> Result<DataFrame, IoError> {
1414    if let Some(frame) = csv_parse_cache_lookup(CsvParseCacheMode::Default, input) {
1415        return Ok(frame);
1416    }
1417
1418    let frame = read_csv_str_uncached(input)?;
1419    csv_parse_cache_store(CsvParseCacheMode::Default, input, &frame);
1420    Ok(frame)
1421}
1422
1423pub fn write_csv_string(frame: &DataFrame) -> Result<String, IoError> {
1424    write_csv_string_with_options(frame, &CsvWriteOptions::default())
1425}
1426
1427pub fn write_markdown_string(frame: &DataFrame) -> Result<String, IoError> {
1428    write_markdown_string_with_options(frame, &MarkdownWriteOptions::default())
1429}
1430
1431pub fn write_latex_string(frame: &DataFrame) -> Result<String, IoError> {
1432    write_latex_string_with_options(frame, &LatexWriteOptions::default())
1433}
1434
1435pub fn write_html_string(frame: &DataFrame) -> Result<String, IoError> {
1436    write_html_string_with_options(frame, &HtmlWriteOptions::default())
1437}
1438
1439pub fn write_xml_string(frame: &DataFrame) -> Result<String, IoError> {
1440    write_xml_string_with_options(frame, &XmlWriteOptions::default())
1441}
1442
1443pub fn write_pickle_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
1444    write_pickle_bytes_with_options(frame, &PickleWriteOptions::default())
1445}
1446
1447pub fn write_stata_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
1448    write_stata_bytes_with_options(frame, &StataWriteOptions::default())
1449}
1450
1451/// Options controlling CSV serialization.
1452///
1453/// Mirrors the subset of pandas `DataFrame.to_csv` parameters that do
1454/// not depend on file IO (that layer is handled by `write_csv`).
1455#[derive(Debug, Clone)]
1456pub struct CsvWriteOptions {
1457    /// Field delimiter. Matches pandas `sep`. Default: `,`.
1458    pub delimiter: u8,
1459    /// String written for missing values. Matches pandas `na_rep`. Default: `""`.
1460    pub na_rep: String,
1461    /// If false, the header row is omitted. Matches pandas `header=False`.
1462    pub header: bool,
1463    /// If true, include the index as the first column. Matches pandas `index`.
1464    pub include_index: bool,
1465    /// Optional label for the index column header. Matches pandas `index_label`.
1466    /// When omitted, a named index uses its name and an unnamed index writes an
1467    /// empty header cell.
1468    pub index_label: Option<String>,
1469}
1470
1471impl Default for CsvWriteOptions {
1472    fn default() -> Self {
1473        Self {
1474            delimiter: b',',
1475            na_rep: String::new(),
1476            header: true,
1477            include_index: false,
1478            index_label: None,
1479        }
1480    }
1481}
1482
1483/// Options controlling Markdown table serialization.
1484///
1485/// Covers the pure string subset of pandas `DataFrame.to_markdown`.
1486#[derive(Debug, Clone)]
1487pub struct MarkdownWriteOptions {
1488    /// If true, include the index as the first column. Default: true.
1489    pub include_index: bool,
1490    /// String written for missing values. Default: `"NaN"`.
1491    pub na_rep: String,
1492    /// Optional label for the index column header.
1493    pub index_label: Option<String>,
1494}
1495
1496impl Default for MarkdownWriteOptions {
1497    fn default() -> Self {
1498        Self {
1499            include_index: true,
1500            na_rep: "NaN".to_owned(),
1501            index_label: None,
1502        }
1503    }
1504}
1505
1506/// Options controlling LaTeX table serialization.
1507///
1508/// Covers the pure string subset of pandas `DataFrame.to_latex`.
1509#[derive(Debug, Clone)]
1510pub struct LatexWriteOptions {
1511    /// If true, include the index as the first column. Default: true.
1512    pub include_index: bool,
1513    /// String written for missing values. Default: `"NaN"`.
1514    pub na_rep: String,
1515    /// Optional label for the index-name row.
1516    pub index_label: Option<String>,
1517    /// Escape LaTeX metacharacters in headers and cells.
1518    pub escape: bool,
1519}
1520
1521impl Default for LatexWriteOptions {
1522    fn default() -> Self {
1523        Self {
1524            include_index: true,
1525            na_rep: "NaN".to_owned(),
1526            index_label: None,
1527            escape: false,
1528        }
1529    }
1530}
1531
1532/// Options controlling HTML table serialization.
1533///
1534/// Covers the pure string subset of pandas `DataFrame.to_html`.
1535#[derive(Debug, Clone)]
1536pub struct HtmlWriteOptions {
1537    /// If true, include the index as the first column. Default: true.
1538    pub include_index: bool,
1539    /// String written for missing values. Matches pandas `na_rep`.
1540    /// Default: `"NaN"`.
1541    pub na_rep: String,
1542    /// Additional CSS classes appended to pandas' default `dataframe` class.
1543    /// Entries may contain whitespace-separated class names.
1544    pub classes: Vec<String>,
1545    /// Optional `id` attribute for the `<table>` element.
1546    pub table_id: Option<String>,
1547    /// Optional border value. `Some(0)` and `None` omit the border attribute.
1548    pub border: Option<u32>,
1549    /// Optional header text alignment. Defaults to pandas' `"right"`.
1550    pub justify: Option<String>,
1551    /// Escape HTML-sensitive characters in headers, index labels, and cells.
1552    pub escape: bool,
1553    /// Convert URL-like string values to anchors.
1554    pub render_links: bool,
1555}
1556
1557impl Default for HtmlWriteOptions {
1558    fn default() -> Self {
1559        Self {
1560            include_index: true,
1561            na_rep: "NaN".to_owned(),
1562            classes: Vec::new(),
1563            table_id: None,
1564            border: Some(1),
1565            justify: None,
1566            escape: true,
1567            render_links: false,
1568        }
1569    }
1570}
1571
1572/// Options controlling HTML table parsing.
1573///
1574/// Covers the first-table subset of pandas `read_html` for already-fetched
1575/// HTML strings and local files. Network fetching, JavaScript execution, and
1576/// rowspan/colspan expansion are intentionally out of scope for this slice.
1577#[derive(Debug, Clone, Default)]
1578pub struct HtmlReadOptions {
1579    /// Zero-based table index to parse. Default: `0`.
1580    pub table_index: usize,
1581}
1582
1583/// Pickle protocol used by [`write_pickle_bytes_with_options`].
1584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1585pub enum PickleProtocol {
1586    /// Python pickle protocol 2, compatible with Python 2 and 3.
1587    V2,
1588    /// Python pickle protocol 3, the serde-pickle default.
1589    V3,
1590}
1591
1592/// Options controlling Pickle serialization.
1593///
1594/// This surface serializes a versioned FrankenPandas DataFrame envelope. It
1595/// does not try to emit arbitrary pandas Python objects.
1596#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1597pub struct PickleWriteOptions {
1598    /// Pickle protocol to emit. Default: protocol 3.
1599    pub protocol: PickleProtocol,
1600}
1601
1602impl Default for PickleWriteOptions {
1603    fn default() -> Self {
1604        Self {
1605            protocol: PickleProtocol::V3,
1606        }
1607    }
1608}
1609
1610/// Options controlling Pickle deserialization.
1611#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1612pub struct PickleReadOptions {
1613    /// Decode legacy protocol 0-2 STRING opcodes as UTF-8. Default: false.
1614    pub decode_legacy_strings: bool,
1615}
1616
1617/// Default HDF5 group key used by [`read_hdf`] and [`write_hdf`].
1618pub const DEFAULT_HDF5_KEY: &str = "frame";
1619
1620#[cfg(feature = "hdf5")]
1621const HDF5_PAYLOAD_DATASET: &str = "__frankenpandas_dataframe_pickle_v1";
1622
1623/// Options controlling HDF5 path reads.
1624///
1625/// The current HDF5 surface stores the versioned FrankenPandas DataFrame
1626/// snapshot envelope under a keyed group. This deliberately preserves index,
1627/// row multiindex, dtype, and null semantics before native PyTables-compatible
1628/// table layouts land.
1629#[derive(Debug, Clone, PartialEq, Eq)]
1630pub struct HdfReadOptions {
1631    /// HDF5 group key to read. Default: [`DEFAULT_HDF5_KEY`].
1632    pub key: String,
1633}
1634
1635impl Default for HdfReadOptions {
1636    fn default() -> Self {
1637        Self {
1638            key: DEFAULT_HDF5_KEY.to_owned(),
1639        }
1640    }
1641}
1642
1643/// Options controlling HDF5 path writes.
1644#[derive(Debug, Clone, PartialEq, Eq)]
1645pub struct HdfWriteOptions {
1646    /// HDF5 group key to write. Default: [`DEFAULT_HDF5_KEY`].
1647    pub key: String,
1648}
1649
1650impl Default for HdfWriteOptions {
1651    fn default() -> Self {
1652        Self {
1653            key: DEFAULT_HDF5_KEY.to_owned(),
1654        }
1655    }
1656}
1657
1658/// Options controlling Stata DTA serialization.
1659#[derive(Debug, Clone, PartialEq, Eq)]
1660pub struct StataWriteOptions {
1661    /// Include the DataFrame index as the first Stata variable. Default: true.
1662    pub include_index: bool,
1663    /// Optional index variable name. Default: `"index"`.
1664    pub index_label: Option<String>,
1665}
1666
1667impl Default for StataWriteOptions {
1668    fn default() -> Self {
1669        Self {
1670            include_index: true,
1671            index_label: None,
1672        }
1673    }
1674}
1675
1676/// Options controlling XML serialization.
1677///
1678/// Covers the writer-only default shape of pandas `DataFrame.to_xml`.
1679#[derive(Debug, Clone)]
1680pub struct XmlWriteOptions {
1681    /// If true, include the index as the first field in each row. Default: true.
1682    pub include_index: bool,
1683    /// XML root element name. Default: `"data"`.
1684    pub root_name: String,
1685    /// XML row element name. Default: `"row"`.
1686    pub row_name: String,
1687    /// Optional index element name. When omitted, use the index name or
1688    /// pandas' default `"index"`.
1689    pub index_label: Option<String>,
1690}
1691
1692impl Default for XmlWriteOptions {
1693    fn default() -> Self {
1694        Self {
1695            include_index: true,
1696            root_name: "data".to_owned(),
1697            row_name: "row".to_owned(),
1698            index_label: None,
1699        }
1700    }
1701}
1702
1703/// Options controlling XML parsing.
1704///
1705/// Covers the row-oriented subset produced by pandas `DataFrame.to_xml`.
1706#[derive(Debug, Clone)]
1707pub struct XmlReadOptions {
1708    /// XML element name representing one DataFrame row. Default: `"row"`.
1709    pub row_name: String,
1710}
1711
1712impl Default for XmlReadOptions {
1713    fn default() -> Self {
1714        Self {
1715            row_name: "row".to_owned(),
1716        }
1717    }
1718}
1719
1720/// Serialize a DataFrame to CSV with explicit options.
1721///
1722/// Matches `pd.DataFrame.to_csv(sep, na_rep, header, index, index_label)`
1723/// for the in-memory string form. Null and NaN-like values are
1724/// substituted with `options.na_rep`; all other scalars use the same
1725/// stringification as the default `write_csv_string`.
1726pub fn write_csv_string_with_options(
1727    frame: &DataFrame,
1728    options: &CsvWriteOptions,
1729) -> Result<String, IoError> {
1730    if options.include_index && frame.row_multiindex().is_some() {
1731        let materialized = materialize_named_row_multiindex_columns(frame)?;
1732        let mut nested_options = options.clone();
1733        nested_options.include_index = false;
1734        nested_options.index_label = None;
1735        return write_csv_string_with_options(&materialized, &nested_options);
1736    }
1737
1738    let mut writer = WriterBuilder::new()
1739        .delimiter(options.delimiter)
1740        .from_writer(Vec::new());
1741
1742    let headers = frame
1743        .column_names()
1744        .into_iter()
1745        .cloned()
1746        .collect::<Vec<_>>();
1747    if options.header {
1748        let mut header_row =
1749            Vec::with_capacity(headers.len() + if options.include_index { 1 } else { 0 });
1750        if options.include_index {
1751            header_row.push(resolve_csv_index_header(frame, options));
1752        }
1753        header_row.extend(headers.iter().cloned());
1754        writer.write_record(&header_row)?;
1755    }
1756
1757    // Pre-compute each datetime column's column-uniform to_csv format (pandas
1758    // renders a datetime column with one date-only/seconds/sub-second form).
1759    let dt_formats: Vec<Option<DatetimeCsvFormat>> = headers
1760        .iter()
1761        .map(|name| {
1762            frame.column(name).and_then(|column| {
1763                (column.dtype() == DType::Datetime64).then(|| datetime_csv_format(column))
1764            })
1765        })
1766        .collect();
1767    // A DatetimeIndex follows the same column-uniform rule when written.
1768    let index_dt_format = if options.include_index {
1769        index_datetime_csv_format(frame)
1770    } else {
1771        None
1772    };
1773
1774    for row_idx in 0..frame.index().len() {
1775        let mut row = Vec::with_capacity(headers.len() + if options.include_index { 1 } else { 0 });
1776        if options.include_index {
1777            row.push(index_label_csv_string(frame, row_idx, index_dt_format)?);
1778        }
1779        row.extend(headers.iter().enumerate().map(|(col_idx, name)| {
1780            let value = frame.column(name).and_then(|column| column.value(row_idx));
1781            match value {
1782                Some(scalar) => scalar_to_csv_cell(scalar, &options.na_rep, dt_formats[col_idx]),
1783                None => options.na_rep.clone(),
1784            }
1785        }));
1786        writer.write_record(&row)?;
1787    }
1788
1789    let bytes = writer.into_inner().map_err(|err| err.into_error())?;
1790    Ok(String::from_utf8(bytes)?)
1791}
1792
1793/// Serialize a DataFrame to a GitHub-style Markdown table.
1794///
1795/// This covers pandas' pure formatter path without taking a dependency on
1796/// Python's optional `tabulate` package.
1797pub fn write_markdown_string_with_options(
1798    frame: &DataFrame,
1799    options: &MarkdownWriteOptions,
1800) -> Result<String, IoError> {
1801    if options.include_index && frame.row_multiindex().is_some() {
1802        let materialized = materialize_named_row_multiindex_columns(frame)?;
1803        let mut nested_options = options.clone();
1804        nested_options.include_index = false;
1805        nested_options.index_label = None;
1806        return write_markdown_string_with_options(&materialized, &nested_options);
1807    }
1808
1809    let headers = frame
1810        .column_names()
1811        .into_iter()
1812        .cloned()
1813        .collect::<Vec<_>>();
1814    let table_width = headers.len() + usize::from(options.include_index);
1815    let mut out = String::new();
1816
1817    let mut header_row = Vec::with_capacity(table_width);
1818    if options.include_index {
1819        header_row.push(resolve_table_index_header(
1820            frame,
1821            options.index_label.as_deref(),
1822        ));
1823    }
1824    header_row.extend(headers.iter().cloned());
1825    push_markdown_row(&mut out, &header_row);
1826
1827    let separator = vec!["---".to_owned(); table_width];
1828    push_markdown_row(&mut out, &separator);
1829
1830    for row_idx in 0..frame.index().len() {
1831        let mut row = Vec::with_capacity(table_width);
1832        if options.include_index {
1833            row.push(index_label_string(frame, row_idx)?);
1834        }
1835        row.extend(headers.iter().map(|name| {
1836            let value = frame.column(name).and_then(|column| column.value(row_idx));
1837            match value {
1838                Some(scalar) => scalar_to_table_with_na(scalar, &options.na_rep),
1839                None => options.na_rep.clone(),
1840            }
1841        }));
1842        push_markdown_row(&mut out, &row);
1843    }
1844
1845    Ok(out)
1846}
1847
1848/// Serialize a DataFrame to a booktabs-compatible LaTeX tabular block.
1849pub fn write_latex_string_with_options(
1850    frame: &DataFrame,
1851    options: &LatexWriteOptions,
1852) -> Result<String, IoError> {
1853    if options.include_index && frame.row_multiindex().is_some() {
1854        let materialized = materialize_named_row_multiindex_columns(frame)?;
1855        let mut nested_options = options.clone();
1856        nested_options.include_index = false;
1857        nested_options.index_label = None;
1858        return write_latex_string_with_options(&materialized, &nested_options);
1859    }
1860
1861    let headers = frame
1862        .column_names()
1863        .into_iter()
1864        .cloned()
1865        .collect::<Vec<_>>();
1866    let table_width = headers.len() + usize::from(options.include_index);
1867    let mut out = String::new();
1868
1869    out.push_str("\\begin{tabular}{");
1870    out.push_str(&"l".repeat(table_width));
1871    out.push_str("}\n\\toprule\n");
1872
1873    let mut header_row = Vec::with_capacity(table_width);
1874    if options.include_index {
1875        header_row.push(String::new());
1876    }
1877    header_row.extend(headers.iter().cloned());
1878    push_latex_row(&mut out, &header_row, options.escape);
1879
1880    if options.include_index {
1881        let index_name = resolve_table_index_header(frame, options.index_label.as_deref());
1882        if !index_name.is_empty() {
1883            let mut index_name_row = Vec::with_capacity(table_width);
1884            index_name_row.push(index_name);
1885            index_name_row.extend(std::iter::repeat_n(String::new(), headers.len()));
1886            push_latex_row(&mut out, &index_name_row, options.escape);
1887        }
1888    }
1889
1890    out.push_str("\\midrule\n");
1891
1892    for row_idx in 0..frame.index().len() {
1893        let mut row = Vec::with_capacity(table_width);
1894        if options.include_index {
1895            row.push(index_label_string(frame, row_idx)?);
1896        }
1897        row.extend(headers.iter().map(|name| {
1898            let value = frame.column(name).and_then(|column| column.value(row_idx));
1899            match value {
1900                Some(scalar) => scalar_to_latex_cell(scalar, &options.na_rep),
1901                None => options.na_rep.clone(),
1902            }
1903        }));
1904        push_latex_row(&mut out, &row, options.escape);
1905    }
1906
1907    out.push_str("\\bottomrule\n\\end{tabular}\n");
1908    Ok(out)
1909}
1910
1911/// Serialize a DataFrame to an HTML table string.
1912pub fn write_html_string_with_options(
1913    frame: &DataFrame,
1914    options: &HtmlWriteOptions,
1915) -> Result<String, IoError> {
1916    if options.include_index && frame.row_multiindex().is_some() {
1917        let materialized = materialize_named_row_multiindex_columns(frame)?;
1918        let nested_options = HtmlWriteOptions {
1919            include_index: false,
1920            ..options.clone()
1921        };
1922        return write_html_string_with_options(&materialized, &nested_options);
1923    }
1924
1925    write_html_table_string(frame, options)
1926}
1927
1928fn write_html_table_string(
1929    frame: &DataFrame,
1930    options: &HtmlWriteOptions,
1931) -> Result<String, IoError> {
1932    let mut out = String::new();
1933    push_html_table_open(&mut out, options);
1934    out.push_str("  <thead>\n    <tr style=\"text-align: ");
1935    out.push_str(&escape_html_attr(
1936        options.justify.as_deref().unwrap_or("right"),
1937    ));
1938    out.push_str(";\">\n");
1939
1940    if options.include_index {
1941        out.push_str("      <th></th>\n");
1942    }
1943    for name in frame.column_names() {
1944        out.push_str("      <th>");
1945        out.push_str(&html_text(name, options.escape));
1946        out.push_str("</th>\n");
1947    }
1948    out.push_str("    </tr>\n  </thead>\n  <tbody>\n");
1949
1950    for row_idx in 0..frame.index().len() {
1951        out.push_str("    <tr>\n");
1952        if options.include_index {
1953            out.push_str("      <th>");
1954            out.push_str(&html_index_label_string(frame, row_idx, options.escape)?);
1955            out.push_str("</th>\n");
1956        }
1957        for name in frame.column_names() {
1958            let value = frame.column(name).and_then(|column| column.value(row_idx));
1959            out.push_str("      <td>");
1960            match value {
1961                Some(scalar) => out.push_str(&html_scalar_string(scalar, options)),
1962                None => out.push_str(&html_text(&options.na_rep, options.escape)),
1963            }
1964            out.push_str("</td>\n");
1965        }
1966        out.push_str("    </tr>\n");
1967    }
1968
1969    out.push_str("  </tbody>\n</table>");
1970    Ok(out)
1971}
1972
1973fn push_html_table_open(out: &mut String, options: &HtmlWriteOptions) {
1974    out.push_str("<table");
1975    if let Some(border) = options.border.filter(|border| *border > 0) {
1976        out.push_str(" border=\"");
1977        out.push_str(&border.to_string());
1978        out.push('"');
1979    }
1980    out.push_str(" class=\"");
1981    out.push_str(&html_class_attr(&options.classes));
1982    out.push('"');
1983    if let Some(table_id) = options
1984        .table_id
1985        .as_deref()
1986        .map(str::trim)
1987        .filter(|table_id| !table_id.is_empty())
1988    {
1989        out.push_str(" id=\"");
1990        out.push_str(&escape_html_attr(table_id));
1991        out.push('"');
1992    }
1993    out.push_str(">\n");
1994}
1995
1996fn html_class_attr(classes: &[String]) -> String {
1997    std::iter::once("dataframe".to_owned())
1998        .chain(
1999            classes
2000                .iter()
2001                .flat_map(|class| class.split_whitespace())
2002                .filter(|class| !class.is_empty())
2003                .map(escape_html_attr),
2004        )
2005        .collect::<Vec<_>>()
2006        .join(" ")
2007}
2008
2009fn html_index_label_string(
2010    frame: &DataFrame,
2011    row_idx: usize,
2012    escape: bool,
2013) -> Result<String, IoError> {
2014    let label = frame
2015        .index()
2016        .labels()
2017        .get(row_idx)
2018        .ok_or_else(|| IoError::Html(format!("missing index label at row {row_idx}")))?;
2019    let raw = match label {
2020        IndexLabel::Int64(v) => v.to_string(),
2021        IndexLabel::Utf8(s) => s.clone(),
2022        IndexLabel::Timedelta64(ns) => Timedelta::format(*ns),
2023        IndexLabel::Datetime64(ns) => format_datetime_ns(*ns),
2024        IndexLabel::Null(_) => label.to_string(),
2025    };
2026    Ok(html_text(&raw, escape))
2027}
2028
2029fn html_scalar_string(scalar: &Scalar, options: &HtmlWriteOptions) -> String {
2030    match scalar {
2031        Scalar::Null(_) => html_text(&options.na_rep, options.escape),
2032        Scalar::Bool(value) => html_text(if *value { "True" } else { "False" }, options.escape),
2033        Scalar::Int64(value) => value.to_string(),
2034        Scalar::Float64(value) => {
2035            if value.is_nan() {
2036                html_text(&options.na_rep, options.escape)
2037            } else if value.fract() == 0.0 {
2038                format!("{value:.1}")
2039            } else {
2040                value.to_string()
2041            }
2042        }
2043        Scalar::Utf8(value) => {
2044            if options.render_links && is_html_renderable_link(value) {
2045                let label = html_text(value, options.escape);
2046                format!(
2047                    "<a href=\"{}\" target=\"_blank\">{label}</a>",
2048                    escape_html_attr(value)
2049                )
2050            } else {
2051                html_text(value, options.escape)
2052            }
2053        }
2054        Scalar::Timedelta64(value) => {
2055            if *value == Timedelta::NAT {
2056                html_text(&options.na_rep, options.escape)
2057            } else {
2058                html_text(&Timedelta::format(*value), options.escape)
2059            }
2060        }
2061        Scalar::Datetime64(value) => {
2062            if *value == Timestamp::NAT {
2063                html_text(&options.na_rep, options.escape)
2064            } else {
2065                html_text(&format_datetime_ns(*value), options.escape)
2066            }
2067        }
2068        Scalar::Period(value) => {
2069            if *value == i64::MIN {
2070                html_text(&options.na_rep, options.escape)
2071            } else {
2072                html_text(&format!("Period[{value}]"), options.escape)
2073            }
2074        }
2075        Scalar::Interval(iv) => html_text(&format!("{iv}"), options.escape),
2076    }
2077}
2078
2079fn html_text(value: &str, escape: bool) -> String {
2080    if escape {
2081        escape_html_text(value)
2082    } else {
2083        value.to_owned()
2084    }
2085}
2086
2087fn is_html_renderable_link(value: &str) -> bool {
2088    value.starts_with("http://") || value.starts_with("https://") || value.starts_with("ftp://")
2089}
2090
2091fn escape_html_text(value: &str) -> String {
2092    let mut escaped = String::with_capacity(value.len());
2093    for ch in value.chars() {
2094        match ch {
2095            '&' => escaped.push_str("&amp;"),
2096            '<' => escaped.push_str("&lt;"),
2097            '>' => escaped.push_str("&gt;"),
2098            _ => escaped.push(ch),
2099        }
2100    }
2101    escaped
2102}
2103
2104fn escape_html_attr(value: &str) -> String {
2105    let mut escaped = String::with_capacity(value.len());
2106    for ch in value.chars() {
2107        match ch {
2108            '&' => escaped.push_str("&amp;"),
2109            '"' => escaped.push_str("&quot;"),
2110            '<' => escaped.push_str("&lt;"),
2111            '>' => escaped.push_str("&gt;"),
2112            _ => escaped.push(ch),
2113        }
2114    }
2115    escaped
2116}
2117
2118/// Parse a DataFrame from the first HTML table in a document string.
2119///
2120/// This is the local, table-oriented subset of pandas `read_html`: it parses
2121/// static HTML with an HTML5 parser, uses the first `<thead><tr>` as headers
2122/// when present, otherwise uses the first row with header cells, and fills
2123/// short body rows with nulls.
2124pub fn read_html_str(input: &str) -> Result<DataFrame, IoError> {
2125    read_html_str_with_options(input, &HtmlReadOptions::default())
2126}
2127
2128/// Parse a DataFrame from an HTML document string with options.
2129pub fn read_html_str_with_options(
2130    input: &str,
2131    options: &HtmlReadOptions,
2132) -> Result<DataFrame, IoError> {
2133    let document = Html::parse_document(input);
2134    let table_selector = html_selector("table")?;
2135    let row_selector = html_selector("tr")?;
2136    let thead_row_selector = html_selector("thead tr")?;
2137    let tbody_row_selector = html_selector("tbody tr")?;
2138    let cell_selector = html_selector("th, td")?;
2139    let th_selector = html_selector("th")?;
2140
2141    let table = document
2142        .select(&table_selector)
2143        .nth(options.table_index)
2144        .ok_or_else(|| {
2145            IoError::Html(format!(
2146                "html input contains no table at index {}",
2147                options.table_index
2148            ))
2149        })?;
2150
2151    let header_rows = table
2152        .select(&thead_row_selector)
2153        .map(|row| html_row_cells(row, &cell_selector))
2154        .filter(|cells| !cells.is_empty())
2155        .collect::<Vec<_>>();
2156    let body_rows = table
2157        .select(&tbody_row_selector)
2158        .map(|row| html_row_cells(row, &cell_selector))
2159        .filter(|cells| !cells.is_empty())
2160        .collect::<Vec<_>>();
2161
2162    if let Some(header_cells) = header_rows.first() {
2163        let headers = normalize_html_headers(header_cells)?;
2164        return html_rows_to_frame(headers, body_rows);
2165    }
2166
2167    let all_rows = table
2168        .select(&row_selector)
2169        .map(|row| {
2170            let has_header_cell = row.select(&th_selector).next().is_some();
2171            (has_header_cell, html_row_cells(row, &cell_selector))
2172        })
2173        .filter(|(_, cells)| !cells.is_empty())
2174        .collect::<Vec<_>>();
2175    if all_rows.is_empty() {
2176        return Err(IoError::Html(
2177            "html table contains no rows with cells".to_owned(),
2178        ));
2179    }
2180
2181    let mut all_rows = all_rows.into_iter();
2182    let (first_has_header, first_cells) = all_rows
2183        .next()
2184        .ok_or_else(|| IoError::Html("html table contains no rows with cells".to_owned()))?;
2185
2186    if first_has_header {
2187        let headers = normalize_html_headers(&first_cells)?;
2188        let data_rows = all_rows.map(|(_, cells)| cells).collect::<Vec<_>>();
2189        html_rows_to_frame(headers, data_rows)
2190    } else {
2191        let mut data_rows = vec![first_cells];
2192        data_rows.extend(all_rows.map(|(_, cells)| cells));
2193        let width = data_rows.iter().map(Vec::len).max().unwrap_or(0);
2194        if width == 0 {
2195            return Err(IoError::Html("html table contains no cells".to_owned()));
2196        }
2197        let headers = (0..width).map(|idx| idx.to_string()).collect::<Vec<_>>();
2198        html_rows_to_frame(headers, data_rows)
2199    }
2200}
2201
2202const PICKLE_FORMAT_KEY: &str = "__frankenpandas_pickle_format";
2203const PICKLE_FORMAT_VERSION: &str = "frankenpandas.dataframe.v1";
2204const PICKLE_ORIENT_KEY: &str = "orient";
2205const PICKLE_PAYLOAD_KEY: &str = "payload";
2206
2207/// Serialize a DataFrame to Pickle bytes.
2208///
2209/// This emits a fail-closed FrankenPandas envelope containing the existing
2210/// split-orient DataFrame representation. It is intentionally narrower than
2211/// pandas' arbitrary Python-object pickle support.
2212pub fn write_pickle_bytes_with_options(
2213    frame: &DataFrame,
2214    options: &PickleWriteOptions,
2215) -> Result<Vec<u8>, IoError> {
2216    let split_json = write_json_string(frame, JsonOrient::Split)?;
2217    let split_value = serde_json::from_str::<serde_json::Value>(&split_json)?;
2218    let mut envelope = serde_json::Map::new();
2219    envelope.insert(
2220        PICKLE_FORMAT_KEY.to_owned(),
2221        serde_json::Value::String(PICKLE_FORMAT_VERSION.to_owned()),
2222    );
2223    envelope.insert(
2224        PICKLE_ORIENT_KEY.to_owned(),
2225        serde_json::Value::String("split".to_owned()),
2226    );
2227    envelope.insert(PICKLE_PAYLOAD_KEY.to_owned(), split_value);
2228
2229    serde_pickle::to_vec(
2230        &serde_json::Value::Object(envelope),
2231        pickle_ser_options(options),
2232    )
2233    .map_err(|err| IoError::Pickle(err.to_string()))
2234}
2235
2236/// Deserialize a DataFrame from Pickle bytes.
2237pub fn read_pickle_bytes(input: &[u8]) -> Result<DataFrame, IoError> {
2238    read_pickle_bytes_with_options(input, &PickleReadOptions::default())
2239}
2240
2241/// Deserialize a DataFrame from Pickle bytes with options.
2242///
2243/// Only the versioned FrankenPandas envelope is accepted. Foreign Python
2244/// pickles fail closed with [`IoError::Pickle`].
2245pub fn read_pickle_bytes_with_options(
2246    input: &[u8],
2247    options: &PickleReadOptions,
2248) -> Result<DataFrame, IoError> {
2249    let value = serde_pickle::from_slice::<serde_json::Value>(input, pickle_de_options(options))
2250        .map_err(|err| IoError::Pickle(err.to_string()))?;
2251    let envelope = value
2252        .as_object()
2253        .ok_or_else(|| IoError::Pickle("pickle payload must be an object".to_owned()))?;
2254
2255    match envelope
2256        .get(PICKLE_FORMAT_KEY)
2257        .and_then(|value| value.as_str())
2258    {
2259        Some(PICKLE_FORMAT_VERSION) => {}
2260        Some(other) => {
2261            return Err(IoError::Pickle(format!(
2262                "unsupported FrankenPandas pickle format '{other}'"
2263            )));
2264        }
2265        None => {
2266            return Err(IoError::Pickle(
2267                "pickle payload is missing FrankenPandas format marker".to_owned(),
2268            ));
2269        }
2270    }
2271
2272    match envelope
2273        .get(PICKLE_ORIENT_KEY)
2274        .and_then(|value| value.as_str())
2275    {
2276        Some("split") => {}
2277        Some(other) => {
2278            return Err(IoError::Pickle(format!(
2279                "unsupported FrankenPandas pickle orient '{other}'"
2280            )));
2281        }
2282        None => {
2283            return Err(IoError::Pickle(
2284                "pickle payload is missing orient".to_owned(),
2285            ));
2286        }
2287    }
2288
2289    let payload = envelope
2290        .get(PICKLE_PAYLOAD_KEY)
2291        .ok_or_else(|| IoError::Pickle("pickle payload is missing data".to_owned()))?;
2292    let payload_json = serde_json::to_string(payload)?;
2293    read_json_str(&payload_json, JsonOrient::Split)
2294}
2295
2296fn pickle_ser_options(options: &PickleWriteOptions) -> serde_pickle::SerOptions {
2297    match options.protocol {
2298        PickleProtocol::V2 => serde_pickle::SerOptions::new().proto_v2(),
2299        PickleProtocol::V3 => serde_pickle::SerOptions::new(),
2300    }
2301}
2302
2303fn pickle_de_options(options: &PickleReadOptions) -> serde_pickle::DeOptions {
2304    let de_options = serde_pickle::DeOptions::new();
2305    if options.decode_legacy_strings {
2306        de_options.decode_strings()
2307    } else {
2308        de_options
2309    }
2310}
2311
2312#[derive(Debug, Clone)]
2313struct StataField {
2314    variable_name: String,
2315    source: StataFieldSource,
2316    variable_type: VariableType,
2317}
2318
2319#[derive(Debug, Clone)]
2320enum StataFieldSource {
2321    Index,
2322    Column(String),
2323}
2324
2325/// Serialize a DataFrame to Stata DTA bytes.
2326///
2327/// This first slice targets DTA release 118 and a DataFrame-oriented subset:
2328/// integer/bool, float, fixed string, and missing values.
2329pub fn write_stata_bytes_with_options(
2330    frame: &DataFrame,
2331    options: &StataWriteOptions,
2332) -> Result<Vec<u8>, IoError> {
2333    let fields = stata_fields_for_frame(frame, options)?;
2334    let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
2335    let mut schema = StataSchema::builder();
2336    for field in &fields {
2337        let format = stata_format_for_type(field.variable_type);
2338        schema = schema.add_variable(
2339            Variable::builder(field.variable_type, &field.variable_name).format(format),
2340        );
2341    }
2342    let schema = schema.build().map_err(stata_error)?;
2343
2344    let mut record_writer = DtaWriter::new()
2345        .from_writer(Cursor::new(Vec::<u8>::new()))
2346        .write_header(header)
2347        .map_err(stata_error)?
2348        .write_schema(schema)
2349        .map_err(stata_error)?
2350        .into_record_writer()
2351        .map_err(stata_error)?;
2352
2353    for row_idx in 0..frame.index().len() {
2354        let mut record = Vec::with_capacity(fields.len());
2355        for field in &fields {
2356            record.push(stata_value_for_field(frame, row_idx, field)?);
2357        }
2358        record_writer.write_record(&record).map_err(stata_error)?;
2359    }
2360
2361    Ok(record_writer
2362        .into_long_string_writer()
2363        .map_err(stata_error)?
2364        .into_value_label_writer()
2365        .map_err(stata_error)?
2366        .finish()
2367        .map_err(stata_error)?
2368        .into_inner())
2369}
2370
2371/// Read a DataFrame from Stata DTA bytes.
2372pub fn read_stata_bytes(input: &[u8]) -> Result<DataFrame, IoError> {
2373    let mut characteristic_reader = DtaReader::new()
2374        .from_reader(Cursor::new(input))
2375        .read_header()
2376        .map_err(stata_error)?
2377        .read_schema()
2378        .map_err(stata_error)?;
2379    characteristic_reader.skip_to_end().map_err(stata_error)?;
2380
2381    let mut record_reader = characteristic_reader
2382        .into_record_reader()
2383        .map_err(stata_error)?;
2384    let column_order = record_reader
2385        .schema()
2386        .variables()
2387        .iter()
2388        .map(|variable| variable.name().to_owned())
2389        .collect::<Vec<_>>();
2390    reject_duplicate_headers(&column_order)?;
2391
2392    let mut columns = column_order
2393        .iter()
2394        .cloned()
2395        .map(|name| (name, Vec::new()))
2396        .collect::<BTreeMap<_, _>>();
2397    let mut row_count: i64 = 0;
2398    while let Some(record) = record_reader.read_record().map_err(stata_error)? {
2399        for (name, value) in column_order.iter().zip(record.values()) {
2400            columns
2401                .get_mut(name)
2402                .ok_or_else(|| IoError::Stata(format!("missing Stata column '{name}'")))?
2403                .push(stata_value_to_scalar(value)?);
2404        }
2405        row_count = row_count
2406            .checked_add(1)
2407            .ok_or_else(|| IoError::Stata("Stata row count exceeded i64 range".to_owned()))?;
2408    }
2409
2410    let mut out = BTreeMap::new();
2411    for name in &column_order {
2412        let values = columns
2413            .remove(name)
2414            .ok_or_else(|| IoError::Stata(format!("missing Stata column '{name}'")))?;
2415        out.insert(name.clone(), Column::from_values(values)?);
2416    }
2417    Ok(DataFrame::new_with_column_order(
2418        Index::from_i64((0..row_count).collect()),
2419        out,
2420        column_order,
2421    )?)
2422}
2423
2424fn stata_fields_for_frame(
2425    frame: &DataFrame,
2426    options: &StataWriteOptions,
2427) -> Result<Vec<StataField>, IoError> {
2428    let mut fields = Vec::new();
2429    if options.include_index {
2430        let name = options
2431            .index_label
2432            .clone()
2433            .unwrap_or_else(|| "index".to_owned());
2434        validate_stata_variable_name(&name)?;
2435        fields.push(StataField {
2436            variable_name: name,
2437            source: StataFieldSource::Index,
2438            variable_type: stata_index_variable_type(frame)?,
2439        });
2440    }
2441
2442    for name in frame.column_names() {
2443        validate_stata_variable_name(name)?;
2444        let column = frame
2445            .column(name)
2446            .ok_or_else(|| IoError::Stata(format!("missing DataFrame column '{name}'")))?;
2447        fields.push(StataField {
2448            variable_name: name.clone(),
2449            source: StataFieldSource::Column(name.clone()),
2450            variable_type: infer_stata_variable_type(column, name)?,
2451        });
2452    }
2453
2454    let mut seen = BTreeSet::new();
2455    for field in &fields {
2456        if !seen.insert(field.variable_name.clone()) {
2457            return Err(IoError::DuplicateColumnName(field.variable_name.clone()));
2458        }
2459    }
2460    Ok(fields)
2461}
2462
2463fn validate_stata_variable_name(name: &str) -> Result<(), IoError> {
2464    if name.is_empty() {
2465        return Err(IoError::Stata(
2466            "Stata variable name cannot be empty".to_owned(),
2467        ));
2468    }
2469    if name.len() > 32 {
2470        return Err(IoError::Stata(format!(
2471            "Stata variable name '{name}' exceeds 32 bytes"
2472        )));
2473    }
2474    let mut chars = name.chars();
2475    let first = chars
2476        .next()
2477        .ok_or_else(|| IoError::Stata("Stata variable name cannot be empty".to_owned()))?;
2478    if !(first == '_' || first.is_ascii_alphabetic()) {
2479        return Err(IoError::Stata(format!(
2480            "invalid Stata variable name '{name}': first character must be ASCII letter or '_'"
2481        )));
2482    }
2483    if !chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) {
2484        return Err(IoError::Stata(format!(
2485            "invalid Stata variable name '{name}': only ASCII letters, digits, and '_' are supported"
2486        )));
2487    }
2488    Ok(())
2489}
2490
2491fn stata_index_variable_type(frame: &DataFrame) -> Result<VariableType, IoError> {
2492    let max_len = frame
2493        .index()
2494        .labels()
2495        .iter()
2496        .map(|label| label.to_string().len())
2497        .max()
2498        .unwrap_or(1)
2499        .max(1);
2500    stata_fixed_string_type(max_len, "index")
2501}
2502
2503fn infer_stata_variable_type(column: &Column, name: &str) -> Result<VariableType, IoError> {
2504    let mut saw_numeric = false;
2505    let mut saw_float = false;
2506    let mut saw_string = false;
2507    let mut max_string_len = 1usize;
2508
2509    for value in column.values() {
2510        match value {
2511            Scalar::Null(_) => {}
2512            Scalar::Bool(_) => {
2513                saw_numeric = true;
2514            }
2515            Scalar::Int64(v) => {
2516                saw_numeric = true;
2517                if i32::try_from(*v).is_err() {
2518                    return Err(IoError::Stata(format!(
2519                        "Stata long column '{name}' cannot encode i64 value {v}"
2520                    )));
2521                }
2522            }
2523            Scalar::Float64(v) => {
2524                if !v.is_nan() {
2525                    saw_numeric = true;
2526                    saw_float = true;
2527                }
2528            }
2529            Scalar::Utf8(text) => {
2530                saw_string = true;
2531                max_string_len = max_string_len.max(text.len());
2532            }
2533            other => {
2534                saw_string = true;
2535                max_string_len = max_string_len.max(scalar_to_table_with_na(other, "").len());
2536            }
2537        }
2538    }
2539
2540    if saw_string {
2541        stata_fixed_string_type(max_string_len, name)
2542    } else if saw_numeric && !saw_float {
2543        Ok(VariableType::Long)
2544    } else {
2545        Ok(VariableType::Double)
2546    }
2547}
2548
2549fn stata_fixed_string_type(len: usize, name: &str) -> Result<VariableType, IoError> {
2550    let width = len.max(1);
2551    let width = u16::try_from(width).map_err(|_| {
2552        IoError::Stata(format!(
2553            "Stata string column '{name}' exceeds fixed string capacity"
2554        ))
2555    })?;
2556    if width > 2045 {
2557        return Err(IoError::Stata(format!(
2558            "Stata string column '{name}' requires strL; this slice supports fixed strings only"
2559        )));
2560    }
2561    Ok(VariableType::FixedString(width))
2562}
2563
2564fn stata_format_for_type(variable_type: VariableType) -> &'static str {
2565    match variable_type {
2566        VariableType::Byte | VariableType::Int | VariableType::Long => "%12.0g",
2567        VariableType::Float | VariableType::Double => "%10.0g",
2568        VariableType::FixedString(_) | VariableType::LongString => "%9s",
2569    }
2570}
2571
2572fn stata_value_for_field(
2573    frame: &DataFrame,
2574    row_idx: usize,
2575    field: &StataField,
2576) -> Result<StataValue<'static>, IoError> {
2577    match field.source {
2578        StataFieldSource::Index => Ok(StataValue::String(std::borrow::Cow::Owned(
2579            index_label_string(frame, row_idx)?,
2580        ))),
2581        StataFieldSource::Column(ref name) => {
2582            let value = frame.column(name).and_then(|column| column.value(row_idx));
2583            scalar_to_stata_value(value, field.variable_type, name)
2584        }
2585    }
2586}
2587
2588fn scalar_to_stata_value(
2589    value: Option<&Scalar>,
2590    variable_type: VariableType,
2591    name: &str,
2592) -> Result<StataValue<'static>, IoError> {
2593    match variable_type {
2594        VariableType::Long => match value {
2595            Some(Scalar::Bool(v)) => Ok(StataValue::Long(StataLong::Present(i32::from(*v)))),
2596            Some(Scalar::Int64(v)) => Ok(StataValue::Long(StataLong::Present(
2597                i32::try_from(*v).map_err(|_| {
2598                    IoError::Stata(format!("Stata long column '{name}' cannot encode {v}"))
2599                })?,
2600            ))),
2601            Some(Scalar::Null(_)) | None => {
2602                Ok(StataValue::Long(StataLong::Missing(MissingValue::System)))
2603            }
2604            Some(other) => Err(IoError::Stata(format!(
2605                "Stata long column '{name}' cannot encode {other:?}"
2606            ))),
2607        },
2608        VariableType::Double => match value {
2609            Some(Scalar::Bool(v)) => Ok(StataValue::Double(StataDouble::Present(if *v {
2610                1.0
2611            } else {
2612                0.0
2613            }))),
2614            Some(Scalar::Int64(v)) => Ok(StataValue::Double(StataDouble::Present(*v as f64))),
2615            Some(Scalar::Float64(v)) if v.is_nan() => Ok(StataValue::Double(StataDouble::Missing(
2616                MissingValue::System,
2617            ))),
2618            Some(Scalar::Float64(v)) => Ok(StataValue::Double(StataDouble::Present(*v))),
2619            Some(Scalar::Null(_)) | None => Ok(StataValue::Double(StataDouble::Missing(
2620                MissingValue::System,
2621            ))),
2622            Some(other) => Err(IoError::Stata(format!(
2623                "Stata double column '{name}' cannot encode {other:?}"
2624            ))),
2625        },
2626        VariableType::FixedString(_) => {
2627            let text = match value {
2628                Some(Scalar::Null(_)) | None => String::new(),
2629                Some(scalar) => scalar_to_table_with_na(scalar, ""),
2630            };
2631            Ok(StataValue::String(std::borrow::Cow::Owned(text)))
2632        }
2633        VariableType::Byte | VariableType::Int | VariableType::Float | VariableType::LongString => {
2634            Err(IoError::Stata(format!(
2635                "unsupported Stata variable type for column '{name}': {variable_type:?}"
2636            )))
2637        }
2638    }
2639}
2640
2641fn stata_value_to_scalar(value: &StataValue<'_>) -> Result<Scalar, IoError> {
2642    match value {
2643        StataValue::Byte(v) => Ok(v
2644            .present()
2645            .map(|value| Scalar::Int64(i64::from(value)))
2646            .unwrap_or(Scalar::Null(NullKind::NaN))),
2647        StataValue::Int(v) => Ok(v
2648            .present()
2649            .map(|value| Scalar::Int64(i64::from(value)))
2650            .unwrap_or(Scalar::Null(NullKind::NaN))),
2651        StataValue::Long(v) => Ok(v
2652            .present()
2653            .map(|value| Scalar::Int64(i64::from(value)))
2654            .unwrap_or(Scalar::Null(NullKind::NaN))),
2655        StataValue::Float(v) => Ok(v
2656            .present()
2657            .map(|value| Scalar::Float64(f64::from(value)))
2658            .unwrap_or(Scalar::Null(NullKind::NaN))),
2659        StataValue::Double(v) => Ok(v
2660            .present()
2661            .map(Scalar::Float64)
2662            .unwrap_or(Scalar::Null(NullKind::NaN))),
2663        StataValue::String(text) => Ok(Scalar::Utf8(text.to_string())),
2664        StataValue::LongStringRef(_) => Err(IoError::Stata(
2665            "Stata strL values are not supported by this reader slice".to_owned(),
2666        )),
2667    }
2668}
2669
2670fn stata_error<E: std::fmt::Display>(err: E) -> IoError {
2671    IoError::Stata(err.to_string())
2672}
2673
2674/// Parse a DataFrame from a row-oriented XML document string.
2675///
2676/// Matches the writer-oriented subset accepted by `pd.read_xml(...,
2677/// parser="etree")`: each row is an element named by
2678/// [`XmlReadOptions::row_name`], and each direct child element becomes a
2679/// DataFrame column. Attributes, XPath, namespaces, and nested field elements
2680/// are intentionally out of scope for this slice.
2681pub fn read_xml_str(input: &str) -> Result<DataFrame, IoError> {
2682    read_xml_str_with_options(input, &XmlReadOptions::default())
2683}
2684
2685/// Parse a DataFrame from a row-oriented XML document string with options.
2686pub fn read_xml_str_with_options(
2687    input: &str,
2688    options: &XmlReadOptions,
2689) -> Result<DataFrame, IoError> {
2690    validate_xml_element_name(&options.row_name)?;
2691
2692    let mut reader = XmlReader::from_str(input);
2693    reader.config_mut().trim_text(false);
2694    let mut buf = Vec::new();
2695    let mut rows: Vec<BTreeMap<String, Scalar>> = Vec::new();
2696    let mut column_order = Vec::new();
2697    let mut seen_columns = HashSet::new();
2698    let mut current_row: Option<BTreeMap<String, Scalar>> = None;
2699    let mut current_field: Option<String> = None;
2700    let mut field_text = String::new();
2701    let mut xml_version = XmlVersion::Implicit1_0;
2702
2703    loop {
2704        match reader
2705            .read_event_into(&mut buf)
2706            .map_err(|err| IoError::Xml(err.to_string()))?
2707        {
2708            Event::Start(event) => {
2709                let name = xml_event_name(event.name())?;
2710                if current_row.is_none() {
2711                    if name == options.row_name {
2712                        current_row = Some(BTreeMap::new());
2713                    }
2714                } else if let Some(field_name) = &current_field {
2715                    return Err(IoError::Xml(format!(
2716                        "nested xml element '{name}' inside field '{field_name}' is unsupported"
2717                    )));
2718                } else {
2719                    current_field = Some(name);
2720                    field_text.clear();
2721                }
2722            }
2723            Event::Empty(event) => {
2724                let name = xml_event_name(event.name())?;
2725                if let Some(field_name) = &current_field {
2726                    return Err(IoError::Xml(format!(
2727                        "nested xml element '{name}' inside field '{field_name}' is unsupported"
2728                    )));
2729                }
2730                if let Some(row) = current_row.as_mut() {
2731                    insert_xml_field(
2732                        row,
2733                        &mut column_order,
2734                        &mut seen_columns,
2735                        name,
2736                        Scalar::Null(NullKind::Null),
2737                    )?;
2738                } else if name == options.row_name {
2739                    rows.push(BTreeMap::new());
2740                }
2741            }
2742            Event::Text(event) => {
2743                if current_field.is_some() {
2744                    let decoded = event
2745                        .xml_content(xml_version)
2746                        .map_err(|err| IoError::Xml(err.to_string()))?;
2747                    field_text.push_str(&decoded);
2748                }
2749            }
2750            Event::CData(event) => {
2751                if current_field.is_some() {
2752                    let decoded = event
2753                        .xml_content(xml_version)
2754                        .map_err(|err| IoError::Xml(err.to_string()))?;
2755                    field_text.push_str(&decoded);
2756                }
2757            }
2758            Event::End(event) => {
2759                let name = xml_event_name(event.name())?;
2760                if let Some(field_name) = current_field.as_ref() {
2761                    if name != *field_name {
2762                        return Err(IoError::Xml(format!(
2763                            "xml field '{field_name}' closed by mismatched element '{name}'"
2764                        )));
2765                    }
2766                    let field_name = current_field.take().expect("field checked");
2767                    let value = parse_scalar(&field_text);
2768                    field_text.clear();
2769                    let row = current_row
2770                        .as_mut()
2771                        .ok_or_else(|| IoError::Xml("xml field outside row".to_owned()))?;
2772                    insert_xml_field(row, &mut column_order, &mut seen_columns, field_name, value)?;
2773                } else if name == options.row_name {
2774                    let row = current_row.take().ok_or_else(|| {
2775                        IoError::Xml("xml row closed before it opened".to_owned())
2776                    })?;
2777                    rows.push(row);
2778                }
2779            }
2780            Event::GeneralRef(reference) => {
2781                if current_field.is_some() {
2782                    field_text.push_str(&decode_xml_general_ref(reference)?);
2783                }
2784            }
2785            Event::Eof => break,
2786            Event::Decl(decl) => {
2787                if let Ok(v) = decl.version() {
2788                    xml_version = match v.as_ref() {
2789                        b"1.0" => XmlVersion::Explicit1_0,
2790                        b"1.1" => XmlVersion::Explicit1_1,
2791                        _ => xml_version,
2792                    };
2793                }
2794            }
2795            Event::PI(_) | Event::DocType(_) | Event::Comment(_) => {}
2796        }
2797        buf.clear();
2798    }
2799
2800    if current_field.is_some() || current_row.is_some() {
2801        return Err(IoError::Xml(
2802            "xml document ended inside an open row or field".to_owned(),
2803        ));
2804    }
2805    if rows.is_empty() {
2806        return Err(IoError::Xml(
2807            "xml input contains no row elements".to_owned(),
2808        ));
2809    }
2810
2811    let mut out_columns = BTreeMap::new();
2812    for name in &column_order {
2813        let values = rows
2814            .iter()
2815            .map(|row| {
2816                row.get(name)
2817                    .cloned()
2818                    .unwrap_or(Scalar::Null(NullKind::Null))
2819            })
2820            .collect::<Vec<_>>();
2821        out_columns.insert(name.clone(), Column::from_values(values)?);
2822    }
2823    let index = Index::from_i64((0..rows.len() as i64).collect());
2824    Ok(DataFrame::new_with_column_order(
2825        index,
2826        out_columns,
2827        column_order,
2828    )?)
2829}
2830
2831/// Serialize a DataFrame to an XML document string.
2832pub fn write_xml_string_with_options(
2833    frame: &DataFrame,
2834    options: &XmlWriteOptions,
2835) -> Result<String, IoError> {
2836    if options.include_index && frame.row_multiindex().is_some() {
2837        let materialized = materialize_named_row_multiindex_columns(frame)?;
2838        let mut nested_options = options.clone();
2839        nested_options.include_index = false;
2840        nested_options.index_label = None;
2841        return write_xml_string_with_options(&materialized, &nested_options);
2842    }
2843
2844    validate_xml_element_name(&options.root_name)?;
2845    validate_xml_element_name(&options.row_name)?;
2846
2847    let headers = frame
2848        .column_names()
2849        .into_iter()
2850        .cloned()
2851        .collect::<Vec<_>>();
2852    for name in &headers {
2853        validate_xml_element_name(name)?;
2854    }
2855
2856    let index_label = options
2857        .index_label
2858        .clone()
2859        .or_else(|| frame.index().name().map(ToOwned::to_owned))
2860        .unwrap_or_else(|| "index".to_owned());
2861    if options.include_index {
2862        validate_xml_element_name(&index_label)?;
2863    }
2864
2865    let mut out = String::new();
2866    out.push_str("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
2867    out.push('<');
2868    out.push_str(&options.root_name);
2869    out.push_str(">\n");
2870
2871    for row_idx in 0..frame.index().len() {
2872        out.push_str("  <");
2873        out.push_str(&options.row_name);
2874        out.push_str(">\n");
2875
2876        if options.include_index {
2877            let value = index_label_string(frame, row_idx)?;
2878            push_xml_field(&mut out, &index_label, Some(&value));
2879        }
2880
2881        for name in &headers {
2882            let value = frame
2883                .column(name)
2884                .and_then(|column| column.value(row_idx))
2885                .and_then(scalar_to_xml_value);
2886            push_xml_field(&mut out, name, value.as_deref());
2887        }
2888
2889        out.push_str("  </");
2890        out.push_str(&options.row_name);
2891        out.push_str(">\n");
2892    }
2893
2894    out.push_str("</");
2895    out.push_str(&options.root_name);
2896    out.push_str(">\n");
2897    Ok(out)
2898}
2899
2900fn xml_event_name(name: quick_xml::name::QName<'_>) -> Result<String, IoError> {
2901    std::str::from_utf8(name.as_ref())
2902        .map(ToOwned::to_owned)
2903        .map_err(|err| IoError::Xml(format!("invalid utf-8 xml element name: {err}")))
2904}
2905
2906fn decode_xml_general_ref(reference: quick_xml::events::BytesRef<'_>) -> Result<String, IoError> {
2907    let raw = std::str::from_utf8(reference.as_ref())
2908        .map_err(|err| IoError::Xml(format!("invalid utf-8 xml entity reference: {err}")))?;
2909    match raw {
2910        "amp" => Ok("&".to_owned()),
2911        "lt" => Ok("<".to_owned()),
2912        "gt" => Ok(">".to_owned()),
2913        "quot" => Ok("\"".to_owned()),
2914        "apos" => Ok("'".to_owned()),
2915        _ if raw.starts_with("#x") => {
2916            let value = u32::from_str_radix(&raw[2..], 16)
2917                .map_err(|err| IoError::Xml(format!("invalid hex xml entity '&{raw};': {err}")))?;
2918            char::from_u32(value)
2919                .map(|ch| ch.to_string())
2920                .ok_or_else(|| IoError::Xml(format!("invalid unicode xml entity '&{raw};'")))
2921        }
2922        _ if raw.starts_with('#') => {
2923            let value = raw[1..].parse::<u32>().map_err(|err| {
2924                IoError::Xml(format!("invalid decimal xml entity '&{raw};': {err}"))
2925            })?;
2926            char::from_u32(value)
2927                .map(|ch| ch.to_string())
2928                .ok_or_else(|| IoError::Xml(format!("invalid unicode xml entity '&{raw};'")))
2929        }
2930        _ => Err(IoError::Xml(format!(
2931            "unsupported xml entity reference '&{raw};'"
2932        ))),
2933    }
2934}
2935
2936fn insert_xml_field(
2937    row: &mut BTreeMap<String, Scalar>,
2938    column_order: &mut Vec<String>,
2939    seen_columns: &mut HashSet<String>,
2940    name: String,
2941    value: Scalar,
2942) -> Result<(), IoError> {
2943    if row.insert(name.clone(), value).is_some() {
2944        return Err(IoError::Xml(format!("duplicate xml field '{name}' in row")));
2945    }
2946    if seen_columns.insert(name.clone()) {
2947        column_order.push(name);
2948    }
2949    Ok(())
2950}
2951
2952fn validate_xml_element_name(name: &str) -> Result<(), IoError> {
2953    let mut chars = name.chars();
2954    let Some(first) = chars.next() else {
2955        return Err(IoError::Xml(
2956            "xml element name must be non-empty".to_owned(),
2957        ));
2958    };
2959    let valid_first = first == '_' || first.is_ascii_alphabetic();
2960    let valid_rest =
2961        chars.all(|ch| ch == '_' || ch == '-' || ch == '.' || ch.is_ascii_alphanumeric());
2962    if valid_first && valid_rest {
2963        Ok(())
2964    } else {
2965        Err(IoError::Xml(format!("invalid xml element name '{name}'")))
2966    }
2967}
2968
2969fn push_xml_field(out: &mut String, name: &str, value: Option<&str>) {
2970    out.push_str("    <");
2971    out.push_str(name);
2972    match value {
2973        Some(value) => {
2974            out.push('>');
2975            out.push_str(&escape_xml_text(value));
2976            out.push_str("</");
2977            out.push_str(name);
2978            out.push_str(">\n");
2979        }
2980        None => out.push_str("/>\n"),
2981    }
2982}
2983
2984fn escape_xml_text(value: &str) -> String {
2985    let mut escaped = String::with_capacity(value.len());
2986    let mut chars = value.chars().peekable();
2987    while let Some(ch) = chars.next() {
2988        match ch {
2989            '&' => escaped.push_str("&amp;"),
2990            '<' => escaped.push_str("&lt;"),
2991            '>' => escaped.push_str("&gt;"),
2992            '\r' => {
2993                escaped.push('\n');
2994                if chars.peek() == Some(&'\n') {
2995                    chars.next();
2996                }
2997            }
2998            _ => escaped.push(ch),
2999        }
3000    }
3001    escaped
3002}
3003
3004fn scalar_to_xml_value(scalar: &Scalar) -> Option<String> {
3005    match scalar {
3006        Scalar::Null(_) => None,
3007        Scalar::Bool(value) => Some(if *value { "True" } else { "False" }.to_owned()),
3008        Scalar::Int64(value) => Some(value.to_string()),
3009        Scalar::Float64(value) => {
3010            if value.is_nan() {
3011                None
3012            } else if value.is_finite() && *value == value.round() && value.abs() < 1e15 {
3013                Some(format!("{value:.1}"))
3014            } else {
3015                Some(value.to_string())
3016            }
3017        }
3018        Scalar::Utf8(value) => Some(value.clone()),
3019        Scalar::Timedelta64(value) => {
3020            if *value == Timedelta::NAT {
3021                None
3022            } else {
3023                Some(Timedelta::format(*value))
3024            }
3025        }
3026        Scalar::Datetime64(value) => {
3027            if *value == Timestamp::NAT {
3028                None
3029            } else {
3030                Some(format_datetime_ns(*value))
3031            }
3032        }
3033        Scalar::Period(value) => {
3034            if *value == i64::MIN {
3035                None
3036            } else {
3037                Some(format!("Period[{value}]"))
3038            }
3039        }
3040        Scalar::Interval(iv) => Some(format!("{iv}")),
3041    }
3042}
3043
3044fn html_selector(pattern: &str) -> Result<Selector, IoError> {
3045    Selector::parse(pattern).map_err(|err| {
3046        IoError::Html(format!(
3047            "invalid built-in html selector {pattern:?}: {err:?}"
3048        ))
3049    })
3050}
3051
3052fn html_row_cells(row: ElementRef<'_>, cell_selector: &Selector) -> Vec<String> {
3053    row.select(cell_selector)
3054        .map(|cell| cell.text().collect::<String>().trim().to_owned())
3055        .collect()
3056}
3057
3058fn normalize_html_headers(raw_headers: &[String]) -> Result<Vec<String>, IoError> {
3059    if raw_headers.is_empty() {
3060        return Err(IoError::Html(
3061            "html table header row contains no cells".to_owned(),
3062        ));
3063    }
3064
3065    let mut seen = HashSet::new();
3066    let mut headers = Vec::with_capacity(raw_headers.len());
3067    for (idx, raw) in raw_headers.iter().enumerate() {
3068        let name = if raw.trim().is_empty() {
3069            format!("Unnamed: {idx}")
3070        } else {
3071            raw.trim().to_owned()
3072        };
3073        if !seen.insert(name.clone()) {
3074            return Err(IoError::DuplicateColumnName(name));
3075        }
3076        headers.push(name);
3077    }
3078    Ok(headers)
3079}
3080
3081fn html_rows_to_frame(
3082    column_order: Vec<String>,
3083    rows: Vec<Vec<String>>,
3084) -> Result<DataFrame, IoError> {
3085    let width = column_order.len();
3086    if width == 0 {
3087        return Err(IoError::Html(
3088            "html table must contain at least one column".to_owned(),
3089        ));
3090    }
3091
3092    let mut values_by_column = column_order
3093        .iter()
3094        .map(|name| (name.clone(), Vec::with_capacity(rows.len())))
3095        .collect::<BTreeMap<_, _>>();
3096    for (row_idx, row) in rows.iter().enumerate() {
3097        if row.len() > width {
3098            return Err(IoError::Html(format!(
3099                "html row {row_idx} has {} cells but header has {width}",
3100                row.len()
3101            )));
3102        }
3103        for (col_idx, name) in column_order.iter().enumerate() {
3104            let value = row
3105                .get(col_idx)
3106                .map_or(Scalar::Null(NullKind::Null), |cell| parse_scalar(cell));
3107            let column_values = values_by_column.get_mut(name).ok_or_else(|| {
3108                IoError::Html(format!("html column '{name}' was not initialized"))
3109            })?;
3110            column_values.push(value);
3111        }
3112    }
3113
3114    let mut columns = BTreeMap::new();
3115    for name in &column_order {
3116        let values = values_by_column
3117            .remove(name)
3118            .ok_or_else(|| IoError::Html(format!("html column '{name}' has no values")))?;
3119        columns.insert(name.clone(), Column::from_values(values)?);
3120    }
3121    let row_count = i64::try_from(rows.len()).map_err(|_| {
3122        IoError::Html(format!(
3123            "html table row count {} exceeds supported i64 index range",
3124            rows.len()
3125        ))
3126    })?;
3127    Ok(DataFrame::new_with_column_order(
3128        Index::from_i64((0..row_count).collect()),
3129        columns,
3130        column_order,
3131    )?)
3132}
3133
3134fn resolve_csv_index_header(frame: &DataFrame, options: &CsvWriteOptions) -> String {
3135    options
3136        .index_label
3137        .clone()
3138        .or_else(|| frame.index().name().map(ToOwned::to_owned))
3139        .unwrap_or_default()
3140}
3141
3142fn resolve_table_index_header(frame: &DataFrame, index_label: Option<&str>) -> String {
3143    index_label
3144        .map(ToOwned::to_owned)
3145        .or_else(|| frame.index().name().map(ToOwned::to_owned))
3146        .unwrap_or_default()
3147}
3148
3149fn index_label_string(frame: &DataFrame, row_idx: usize) -> Result<String, IoError> {
3150    frame
3151        .index()
3152        .labels()
3153        .get(row_idx)
3154        .map(ToString::to_string)
3155        .ok_or_else(|| {
3156            IoError::Frame(FrameError::CompatibilityRejected(format!(
3157                "index position {row_idx} out of bounds for index length {}",
3158                frame.index().len()
3159            )))
3160        })
3161}
3162
3163/// Derive the column-uniform `to_csv` datetime spec for a DatetimeIndex.
3164///
3165/// Returns `None` unless every label is `Datetime64` (a homogeneous datetime
3166/// index), so non-datetime / mixed indexes keep their default rendering.
3167fn index_datetime_csv_format(frame: &DataFrame) -> Option<DatetimeCsvFormat> {
3168    let labels = frame.index().labels();
3169    let mut any_datetime = false;
3170    let mut date_only = true;
3171    let mut subsec_digits = 0u8;
3172    for label in labels {
3173        let IndexLabel::Datetime64(ns) = label else {
3174            return None;
3175        };
3176        any_datetime = true;
3177        if *ns == i64::MIN {
3178            continue; // NaT
3179        }
3180        let subsec = (ns.rem_euclid(1_000_000_000)) as u32;
3181        if ns.div_euclid(1_000_000_000).rem_euclid(86_400) != 0 || subsec != 0 {
3182            date_only = false;
3183        }
3184        if subsec != 0 {
3185            let digits = if !subsec.is_multiple_of(1_000) {
3186                9
3187            } else if !subsec.is_multiple_of(1_000_000) {
3188                6
3189            } else {
3190                3
3191            };
3192            subsec_digits = subsec_digits.max(digits);
3193        }
3194    }
3195    any_datetime.then_some(DatetimeCsvFormat {
3196        date_only,
3197        subsec_digits,
3198    })
3199}
3200
3201/// CSV index-cell formatter: applies a DatetimeIndex's column-uniform spec when
3202/// present, otherwise identical to `index_label_string`.
3203fn index_label_csv_string(
3204    frame: &DataFrame,
3205    row_idx: usize,
3206    dt_format: Option<DatetimeCsvFormat>,
3207) -> Result<String, IoError> {
3208    if let Some(fmt) = dt_format
3209        && let Some(IndexLabel::Datetime64(ns)) = frame.index().labels().get(row_idx)
3210        && *ns != i64::MIN
3211    {
3212        return Ok(format_datetime_csv(*ns, fmt));
3213    }
3214    index_label_string(frame, row_idx)
3215}
3216
3217/// Column-uniform datetime rendering spec for `to_csv`, mirroring pandas.
3218///
3219/// pandas chooses one format for the whole datetime column: a date-only
3220/// `YYYY-MM-DD` when every value falls exactly on midnight, otherwise
3221/// `YYYY-MM-DD HH:MM:SS` with a fractional-seconds suffix whose width is the
3222/// column's finest sub-second resolution (3 = ms, 6 = µs, 9 = ns; 0 = none).
3223#[derive(Clone, Copy)]
3224struct DatetimeCsvFormat {
3225    date_only: bool,
3226    subsec_digits: u8,
3227}
3228
3229/// Scan a Datetime64 column and derive its column-uniform `to_csv` format.
3230fn datetime_csv_format(column: &Column) -> DatetimeCsvFormat {
3231    let mut date_only = true;
3232    let mut subsec_digits = 0u8;
3233    for value in column.values() {
3234        let Scalar::Datetime64(ns) = value else {
3235            continue;
3236        };
3237        if *ns == Timestamp::NAT {
3238            continue;
3239        }
3240        let subsec = (ns.rem_euclid(1_000_000_000)) as u32;
3241        // Any non-midnight time-of-day OR any sub-second component forces the
3242        // full timestamp form for the entire column.
3243        if ns.div_euclid(1_000_000_000).rem_euclid(86_400) != 0 || subsec != 0 {
3244            date_only = false;
3245        }
3246        if subsec != 0 {
3247            let digits = if !subsec.is_multiple_of(1_000) {
3248                9
3249            } else if !subsec.is_multiple_of(1_000_000) {
3250                6
3251            } else {
3252                3
3253            };
3254            subsec_digits = subsec_digits.max(digits);
3255        }
3256    }
3257    DatetimeCsvFormat {
3258        date_only,
3259        subsec_digits,
3260    }
3261}
3262
3263/// Format one datetime (ns since epoch) under a column's `to_csv` spec.
3264fn format_datetime_csv(nanos: i64, fmt: DatetimeCsvFormat) -> String {
3265    // format_datetime_ns yields "YYYY-MM-DD HH:MM:SS" (ASCII, 19 chars for the
3266    // i64-ns datetime range, year always 4 digits). Trim or extend per spec.
3267    let base = format_datetime_ns(nanos);
3268    if fmt.date_only {
3269        return base[..10].to_owned();
3270    }
3271    if fmt.subsec_digits == 0 {
3272        return base;
3273    }
3274    let subsec = (nanos.rem_euclid(1_000_000_000)) as u32;
3275    let frac = subsec / 10u32.pow(9 - u32::from(fmt.subsec_digits));
3276    format!(
3277        "{base}.{frac:0>width$}",
3278        width = usize::from(fmt.subsec_digits)
3279    )
3280}
3281
3282fn scalar_to_csv_with_na(scalar: &Scalar, na_rep: &str) -> String {
3283    match scalar {
3284        Scalar::Null(_) => na_rep.to_owned(),
3285        Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3286        // pandas to_csv renders floats via Python str(float): whole numbers keep
3287        // ".0" ("1.0", not "1") and extreme magnitudes use signed two-digit
3288        // scientific notation. The table writers (latex/markdown/html) use the
3289        // separate scalar_to_table_with_na path and keep their own formatting.
3290        Scalar::Float64(v) => format_pandas_float(*v),
3291        Scalar::Timedelta64(v) if *v == Timedelta::NAT => na_rep.to_owned(),
3292        other => scalar_to_csv(other),
3293    }
3294}
3295
3296/// CSV cell formatter that applies a column's datetime spec when present;
3297/// otherwise identical to `scalar_to_csv_with_na`.
3298fn scalar_to_csv_cell(
3299    scalar: &Scalar,
3300    na_rep: &str,
3301    dt_format: Option<DatetimeCsvFormat>,
3302) -> String {
3303    if let (Scalar::Datetime64(ns), Some(fmt)) = (scalar, dt_format) {
3304        if *ns == Timestamp::NAT {
3305            return na_rep.to_owned();
3306        }
3307        return format_datetime_csv(*ns, fmt);
3308    }
3309    scalar_to_csv_with_na(scalar, na_rep)
3310}
3311
3312fn scalar_to_table_with_na(scalar: &Scalar, na_rep: &str) -> String {
3313    match scalar {
3314        Scalar::Null(_) => na_rep.to_owned(),
3315        Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3316        Scalar::Timedelta64(v) if *v == Timedelta::NAT => na_rep.to_owned(),
3317        other => scalar_to_csv(other),
3318    }
3319}
3320
3321/// LaTeX cell formatter. pandas `to_latex` renders floats with its default
3322/// `float_format` of six decimal places (`1.0` -> "1.000000", `0.1234567` ->
3323/// "0.123457"), unlike the str(float) form used by to_csv/to_html. Integers and
3324/// other scalars keep the shared table formatting. Verified vs pandas 2.2.3.
3325fn scalar_to_latex_cell(scalar: &Scalar, na_rep: &str) -> String {
3326    match scalar {
3327        Scalar::Float64(v) if v.is_nan() => na_rep.to_owned(),
3328        // Rust `{:.6}` matches Python `%.6f` (round-half-to-even) and renders
3329        // infinities as "inf"/"-inf", exactly as pandas to_latex does.
3330        Scalar::Float64(v) => format!("{v:.6}"),
3331        other => scalar_to_table_with_na(other, na_rep),
3332    }
3333}
3334
3335fn push_markdown_row(out: &mut String, cells: &[String]) {
3336    out.push('|');
3337    for cell in cells {
3338        out.push(' ');
3339        out.push_str(&escape_markdown_table_cell(cell));
3340        out.push_str(" |");
3341    }
3342    out.push('\n');
3343}
3344
3345fn escape_markdown_table_cell(value: &str) -> String {
3346    let mut escaped = String::with_capacity(value.len());
3347    for ch in value.chars() {
3348        match ch {
3349            '\\' => escaped.push_str("\\\\"),
3350            '|' => escaped.push_str("\\|"),
3351            '\n' | '\r' => escaped.push(' '),
3352            _ => escaped.push(ch),
3353        }
3354    }
3355    escaped
3356}
3357
3358fn push_latex_row(out: &mut String, cells: &[String], escape: bool) {
3359    for (idx, cell) in cells.iter().enumerate() {
3360        if idx > 0 {
3361            out.push_str(" & ");
3362        }
3363        if escape {
3364            out.push_str(&escape_latex_table_cell(cell));
3365        } else {
3366            out.push_str(cell);
3367        }
3368    }
3369    out.push_str(" \\\\\n");
3370}
3371
3372fn escape_latex_table_cell(value: &str) -> String {
3373    let mut escaped = String::with_capacity(value.len());
3374    for ch in value.chars() {
3375        match ch {
3376            '&' => escaped.push_str("\\&"),
3377            '%' => escaped.push_str("\\%"),
3378            '$' => escaped.push_str("\\$"),
3379            '#' => escaped.push_str("\\#"),
3380            '_' => escaped.push_str("\\_"),
3381            '{' => escaped.push_str("\\{"),
3382            '}' => escaped.push_str("\\}"),
3383            '~' => escaped.push_str("\\textasciitilde{}"),
3384            '^' => escaped.push_str("\\textasciicircum{}"),
3385            '\\' => escaped.push_str("\\textbackslash{}"),
3386            '\n' | '\r' => escaped.push(' '),
3387            _ => escaped.push(ch),
3388        }
3389    }
3390    escaped
3391}
3392
3393fn is_pandas_default_na(s: &str) -> bool {
3394    // Default NA values recognized by pandas read_csv.
3395    // See: <https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html>
3396    matches!(
3397        s,
3398        "" | "#N/A"
3399            | "#N/A N/A"
3400            | "#NA"
3401            | "-1.#IND"
3402            | "-1.#QNAN"
3403            | "-NaN"
3404            | "-nan"
3405            | "1.#IND"
3406            | "1.#QNAN"
3407            | "<NA>"
3408            | "N/A"
3409            | "NA"
3410            | "NULL"
3411            | "NaN"
3412            | "None"
3413            | "n/a"
3414            | "nan"
3415            | "null"
3416    )
3417}
3418
3419fn parse_scalar(field: &str) -> Scalar {
3420    // pandas read_csv tolerates surrounding whitespace ONLY for numeric
3421    // inference. NA markers and booleans must match the field EXACTLY (a
3422    // padded " NA " or "true " is NOT null/bool), and plain strings keep their
3423    // original whitespace. Verified vs live pandas 2.2.3: " abc " stays
3424    // " abc "; "true " / " NA " stay strings; but " 1 " parses to Int64(1).
3425    if is_pandas_default_na(field) {
3426        return Scalar::Null(NullKind::Null);
3427    }
3428
3429    let trimmed = field.trim();
3430    if let Ok(value) = trimmed.parse::<i64>() {
3431        return Scalar::Int64(value);
3432    }
3433    if let Ok(value) = trimmed.parse::<f64>() {
3434        return Scalar::Float64(value);
3435    }
3436    if field.eq_ignore_ascii_case("true") {
3437        return Scalar::Bool(true);
3438    }
3439    if field.eq_ignore_ascii_case("false") {
3440        return Scalar::Bool(false);
3441    }
3442
3443    Scalar::Utf8(field.to_owned())
3444}
3445
3446/// Format an f64 the way pandas `to_csv` does (Python `str(float)`): keep a
3447/// trailing ".0" on whole numbers ("1.0", not Rust Display's "1"), shortest
3448/// round-trip for decimals, and signed two-digit scientific notation
3449/// ("1e+16" / "1e-05") for very large / small magnitudes. Verified vs live
3450/// pandas 2.2.3. NaN must be handled by the caller (it becomes na_rep).
3451fn format_pandas_float(v: f64) -> String {
3452    // Rust's Debug formatter gives the shortest round-trip representation and,
3453    // unlike Display, keeps ".0" on whole numbers and switches to scientific
3454    // notation at Python's repr boundaries. Only the exponent spelling differs
3455    // (Rust "1e16"/"1e-5" vs Python "1e+16"/"1e-05"), so normalize that.
3456    let s = format!("{v:?}");
3457    match s.split_once('e') {
3458        None => s,
3459        Some((mantissa, exp)) => {
3460            let (sign, digits) = match exp.strip_prefix('-') {
3461                Some(d) => ('-', d),
3462                None => ('+', exp.strip_prefix('+').unwrap_or(exp)),
3463            };
3464            format!("{mantissa}e{sign}{digits:0>2}")
3465        }
3466    }
3467}
3468
3469fn scalar_to_csv(scalar: &Scalar) -> String {
3470    match scalar {
3471        Scalar::Null(_) => String::new(),
3472        // pandas to_csv writes capitalized True/False (matches fp-frame::to_csv).
3473        Scalar::Bool(v) => if *v { "True" } else { "False" }.to_string(),
3474        Scalar::Int64(v) => v.to_string(),
3475        Scalar::Float64(v) => {
3476            if v.is_nan() {
3477                String::new()
3478            } else {
3479                v.to_string()
3480            }
3481        }
3482        Scalar::Utf8(v) => v.clone(),
3483        Scalar::Timedelta64(v) => {
3484            if *v == Timedelta::NAT {
3485                String::new()
3486            } else {
3487                Timedelta::format(*v)
3488            }
3489        }
3490        Scalar::Datetime64(v) => {
3491            if *v == Timestamp::NAT {
3492                String::new()
3493            } else {
3494                format_datetime_ns(*v)
3495            }
3496        }
3497        Scalar::Period(v) => {
3498            if *v == i64::MIN {
3499                String::new()
3500            } else {
3501                format!("Period[{v}]")
3502            }
3503        }
3504        Scalar::Interval(iv) => format!("{iv}"),
3505    }
3506}
3507
3508/// Parse a field with NA value handling respecting pandas options.
3509///
3510/// - `na_filter`: If false, skip NA detection entirely for performance
3511/// - `keep_default_na`: If true, use pandas default NA values
3512/// - `na_set` / `true_set` / `false_set`: Pre-built HashSets for O(1)
3513///   membership lookup. Per br-frankenpandas-b67a3 (sister to br-fcf5d),
3514///   these were previously `&[String]` slices scanned linearly per cell;
3515///   now built once at the parent CSV reader and passed in.
3516#[allow(clippy::too_many_arguments)]
3517fn parse_scalar_with_options(
3518    field: &str,
3519    na_filter: bool,
3520    keep_default_na: bool,
3521    na_set: &HashSet<&str>,
3522    true_set: &HashSet<&str>,
3523    false_set: &HashSet<&str>,
3524    decimal: u8,
3525    thousands: Option<u8>,
3526) -> Scalar {
3527    // pandas only tolerates surrounding whitespace for NUMERIC inference; NA
3528    // markers, booleans, and plain strings match/keep the field EXACTLY (a
3529    // padded " NA " or "true " stays a string). See parse_scalar.
3530    let trimmed = field.trim();
3531
3532    // Check NA values only if na_filter is enabled
3533    if na_filter {
3534        let is_default_na = keep_default_na && is_pandas_default_na(field);
3535        let is_custom_na = na_set.contains(field);
3536        if is_default_na || is_custom_na {
3537            return Scalar::Null(NullKind::Null);
3538        }
3539    }
3540
3541    // `thousands` is silently ignored if it equals the decimal separator,
3542    // matching pandas semantics.
3543    let thousands_effective = thousands.filter(|t| *t != decimal);
3544    let numeric_candidate: Cow<'_, str> = if let Some(t) = thousands_effective {
3545        let ch = char::from(t);
3546        if trimmed.contains(ch) {
3547            Cow::Owned(trimmed.replace(ch, ""))
3548        } else {
3549            Cow::Borrowed(trimmed)
3550        }
3551    } else {
3552        Cow::Borrowed(trimmed)
3553    };
3554
3555    if let Ok(value) = numeric_candidate.as_ref().parse::<i64>() {
3556        return Scalar::Int64(value);
3557    }
3558
3559    let decimal_ch = char::from(decimal);
3560    let float_candidate: Cow<'_, str> = if decimal == b'.' {
3561        Cow::Borrowed(numeric_candidate.as_ref())
3562    } else if numeric_candidate.contains(decimal_ch) {
3563        Cow::Owned(numeric_candidate.replace(decimal_ch, "."))
3564    } else {
3565        Cow::Borrowed(numeric_candidate.as_ref())
3566    };
3567    if let Ok(value) = float_candidate.as_ref().parse::<f64>() {
3568        return Scalar::Float64(value);
3569    }
3570
3571    if true_set.contains(field) {
3572        return Scalar::Bool(true);
3573    }
3574    if false_set.contains(field) {
3575        return Scalar::Bool(false);
3576    }
3577
3578    if field.eq_ignore_ascii_case("true") {
3579        return Scalar::Bool(true);
3580    }
3581    if field.eq_ignore_ascii_case("false") {
3582        return Scalar::Bool(false);
3583    }
3584    Scalar::Utf8(field.to_owned())
3585}
3586
3587fn reject_duplicate_headers(headers: &[String]) -> Result<(), IoError> {
3588    let mut used = BTreeSet::new();
3589    for name in headers {
3590        if !used.insert(name.clone()) {
3591            return Err(IoError::DuplicateColumnName(name.clone()));
3592        }
3593    }
3594    Ok(())
3595}
3596
3597fn validate_usecols(headers: &[String], usecols: &[String]) -> Result<(), IoError> {
3598    let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3599    let mut missing = Vec::new();
3600    for name in usecols {
3601        if !header_set.contains(name) {
3602            missing.push(name.clone());
3603        }
3604    }
3605    if missing.is_empty() {
3606        Ok(())
3607    } else {
3608        Err(IoError::MissingUsecols(missing))
3609    }
3610}
3611
3612fn validate_parse_dates(headers: &[String], parse_dates: &[String]) -> Result<(), IoError> {
3613    let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3614    let mut missing = Vec::new();
3615    for name in parse_dates {
3616        if !header_set.contains(name) {
3617            missing.push(name.clone());
3618        }
3619    }
3620    if missing.is_empty() {
3621        Ok(())
3622    } else {
3623        Err(IoError::MissingParseDateColumns(missing))
3624    }
3625}
3626
3627fn validate_parse_date_combinations(
3628    headers: &[String],
3629    parse_date_combinations: &[Vec<String>],
3630) -> Result<(), IoError> {
3631    let header_set: std::collections::BTreeSet<&String> = headers.iter().collect();
3632    let mut missing = BTreeSet::new();
3633    for combo in parse_date_combinations {
3634        for name in combo {
3635            if !header_set.contains(name) {
3636                missing.insert(name.clone());
3637            }
3638        }
3639    }
3640    if missing.is_empty() {
3641        Ok(())
3642    } else {
3643        Err(IoError::MissingParseDateColumns(
3644            missing.into_iter().collect(),
3645        ))
3646    }
3647}
3648
3649fn apply_parse_dates(
3650    headers: &[String],
3651    columns: &mut [Vec<Scalar>],
3652    parse_dates: &[String],
3653) -> Result<(), IoError> {
3654    if parse_dates.is_empty() {
3655        return Ok(());
3656    }
3657
3658    validate_parse_dates(headers, parse_dates)?;
3659
3660    for column_name in parse_dates {
3661        let Some(column_idx) = headers.iter().position(|header| header == column_name) else {
3662            continue;
3663        };
3664
3665        let index_labels = (0..columns[column_idx].len() as i64)
3666            .map(IndexLabel::Int64)
3667            .collect::<Vec<_>>();
3668        let series = Series::from_values(
3669            column_name.clone(),
3670            index_labels,
3671            columns[column_idx].clone(),
3672        )?;
3673        if let Some(parsed) = parse_csv_datetime_column(&series)? {
3674            columns[column_idx] = parsed.values().to_vec();
3675        }
3676    }
3677
3678    Ok(())
3679}
3680
3681fn parse_sql_float_text(text: &str) -> Option<f64> {
3682    let trimmed = text.trim();
3683    if trimmed.is_empty() {
3684        return None;
3685    }
3686
3687    let mut normalized = String::with_capacity(trimmed.len());
3688    for ch in trimmed.chars() {
3689        match ch {
3690            ',' => {}
3691            '$' if normalized.is_empty() || normalized == "+" || normalized == "-" => {}
3692            _ => normalized.push(ch),
3693        }
3694    }
3695
3696    if matches!(normalized.as_str(), "" | "+" | "-" | ".") {
3697        return None;
3698    }
3699
3700    let value = normalized.parse::<f64>().ok()?;
3701    value.is_finite().then_some(value)
3702}
3703
3704fn apply_sql_coerce_float(columns: &mut [Vec<Scalar>]) {
3705    for column in columns {
3706        let mut saw_text_float = false;
3707        let mut parsed_values = Vec::with_capacity(column.len());
3708
3709        for value in column.iter() {
3710            match value {
3711                Scalar::Utf8(text) => {
3712                    let Some(parsed) = parse_sql_float_text(text) else {
3713                        saw_text_float = false;
3714                        parsed_values.clear();
3715                        break;
3716                    };
3717                    saw_text_float = true;
3718                    parsed_values.push(Some(parsed));
3719                }
3720                Scalar::Null(_) | Scalar::Int64(_) | Scalar::Float64(_) => {
3721                    parsed_values.push(None);
3722                }
3723                Scalar::Bool(_)
3724                | Scalar::Timedelta64(_)
3725                | Scalar::Datetime64(_)
3726                | Scalar::Period(_)
3727                | Scalar::Interval(_) => {
3728                    saw_text_float = false;
3729                    parsed_values.clear();
3730                    break;
3731                }
3732            }
3733        }
3734
3735        if !saw_text_float {
3736            continue;
3737        }
3738
3739        for (value, parsed) in column.iter_mut().zip(parsed_values) {
3740            if let Some(parsed) = parsed {
3741                *value = Scalar::Float64(parsed);
3742            }
3743        }
3744    }
3745}
3746
3747fn combine_parse_date_values(column_group: &[Vec<Scalar>]) -> Vec<Scalar> {
3748    let len = column_group.first().map_or(0, Vec::len);
3749    let mut combined = Vec::with_capacity(len);
3750
3751    for row in 0..len {
3752        if column_group
3753            .iter()
3754            .any(|column| matches!(column[row], Scalar::Null(_)))
3755        {
3756            combined.push(Scalar::Null(NullKind::NaT));
3757            continue;
3758        }
3759
3760        let joined = column_group
3761            .iter()
3762            .map(|column| match &column[row] {
3763                Scalar::Utf8(value) => value.clone(),
3764                other => other.to_string(),
3765            })
3766            .collect::<Vec<_>>()
3767            .join(" ");
3768        combined.push(Scalar::Utf8(joined));
3769    }
3770
3771    combined
3772}
3773
3774fn apply_one_parse_date_combination(
3775    headers: &mut Vec<String>,
3776    columns: &mut Vec<Vec<Scalar>>,
3777    combined_name: String,
3778    sources: &[String],
3779) -> Result<(), IoError> {
3780    let mut positions = sources
3781        .iter()
3782        .map(|name| {
3783            headers
3784                .iter()
3785                .position(|header| header == name)
3786                .ok_or_else(|| IoError::MissingParseDateColumns(vec![name.clone()]))
3787        })
3788        .collect::<Result<Vec<_>, _>>()?;
3789    positions.sort_unstable();
3790
3791    let index_labels = (0..columns[positions[0]].len() as i64)
3792        .map(IndexLabel::Int64)
3793        .collect::<Vec<_>>();
3794    let combined_values = combine_parse_date_values(
3795        &positions
3796            .iter()
3797            .map(|&idx| columns[idx].clone())
3798            .collect::<Vec<_>>(),
3799    );
3800    let combined_series =
3801        Series::from_values(combined_name.clone(), index_labels, combined_values)?;
3802    let parsed = parse_csv_datetime_column(&combined_series)?.unwrap_or(combined_series);
3803
3804    for idx in positions.iter().rev() {
3805        headers.remove(*idx);
3806        columns.remove(*idx);
3807    }
3808    headers.insert(positions[0], combined_name);
3809    columns.insert(positions[0], parsed.values().to_vec());
3810    Ok(())
3811}
3812
3813fn parse_csv_datetime_column(series: &Series) -> Result<Option<Series>, IoError> {
3814    // pandas pd.read_csv(parse_dates=[col]) parses each value on its own —
3815    // a column with mixed naive ("2024-01-15 10:30:00") and aware
3816    // ("2024-01-15T10:30:00Z") entries normalizes each value
3817    // independently. The ToDatetimeOptions default `infer_mixed_timezone:
3818    // true` locks the column to the FIRST inferred pattern and rejects
3819    // any value that doesn't match it, which causes parse_failed=true and
3820    // leaves the column as raw strings even though every individual value
3821    // is parseable. Set it explicitly to false so each value goes through
3822    // parse_datetime_string, which already handles both naive and aware.
3823    let parsed = to_datetime_with_options(
3824        series,
3825        ToDatetimeOptions {
3826            infer_mixed_timezone: false,
3827            ..ToDatetimeOptions::default()
3828        },
3829    )?;
3830    let parse_failed = series
3831        .values()
3832        .iter()
3833        .zip(parsed.values())
3834        .any(|(original, parsed)| !original.is_missing() && parsed.is_missing());
3835
3836    if parse_failed {
3837        Ok(None)
3838    } else {
3839        Ok(Some(parsed))
3840    }
3841}
3842
3843fn pandas_csv_numeric_column_requires_float(values: &[Scalar]) -> bool {
3844    // DISC-011: Nullable extension Int64 dtype parity.
3845    // Previously: Int64 columns with nulls promoted to Float64 for CSV output.
3846    // Now: Int64 columns preserve integer encoding; only promote when the
3847    // column actually contains Float64 values (mixed Int64/Float64 → Float64).
3848    let mut saw_int = false;
3849    let mut saw_float = false;
3850
3851    for value in values {
3852        match value {
3853            Scalar::Int64(_) => saw_int = true,
3854            Scalar::Float64(_) => saw_float = true,
3855            Scalar::Null(_) => {}
3856            Scalar::Bool(_)
3857            | Scalar::Utf8(_)
3858            | Scalar::Timedelta64(_)
3859            | Scalar::Datetime64(_)
3860            | Scalar::Period(_)
3861            | Scalar::Interval(_) => {
3862                return false;
3863            }
3864        }
3865    }
3866
3867    saw_int && saw_float
3868}
3869
3870fn apply_pandas_csv_numeric_promotions(columns: &mut [Vec<Scalar>]) {
3871    for column in columns {
3872        if !pandas_csv_numeric_column_requires_float(column) {
3873            continue;
3874        }
3875
3876        for value in column {
3877            if let Scalar::Int64(v) = value {
3878                *value = Scalar::Float64(*v as f64);
3879            }
3880        }
3881    }
3882}
3883
3884fn apply_parse_date_combinations(
3885    headers: &mut Vec<String>,
3886    columns: &mut Vec<Vec<Scalar>>,
3887    parse_date_combinations: &[Vec<String>],
3888) -> Result<(), IoError> {
3889    if parse_date_combinations.is_empty() {
3890        return Ok(());
3891    }
3892
3893    validate_parse_date_combinations(headers, parse_date_combinations)?;
3894
3895    for combination in parse_date_combinations {
3896        if combination.is_empty() {
3897            continue;
3898        }
3899        let combined_name = combination.join("_");
3900        apply_one_parse_date_combination(headers, columns, combined_name, combination)?;
3901    }
3902
3903    Ok(())
3904}
3905
3906fn apply_parse_date_combinations_named(
3907    headers: &mut Vec<String>,
3908    columns: &mut Vec<Vec<Scalar>>,
3909    parse_date_combinations_named: &[(String, Vec<String>)],
3910) -> Result<(), IoError> {
3911    if parse_date_combinations_named.is_empty() {
3912        return Ok(());
3913    }
3914
3915    let mut assigned_names: std::collections::HashSet<String> = std::collections::HashSet::new();
3916    for (new_name, _) in parse_date_combinations_named {
3917        if !assigned_names.insert(new_name.clone()) {
3918            return Err(IoError::DuplicateColumnName(new_name.clone()));
3919        }
3920    }
3921
3922    let combos_only: Vec<Vec<String>> = parse_date_combinations_named
3923        .iter()
3924        .map(|(_, sources)| sources.clone())
3925        .collect();
3926    validate_parse_date_combinations(headers, &combos_only)?;
3927
3928    for (new_name, sources) in parse_date_combinations_named {
3929        if sources.is_empty() {
3930            continue;
3931        }
3932        apply_one_parse_date_combination(headers, columns, new_name.clone(), sources)?;
3933    }
3934
3935    Ok(())
3936}
3937
3938fn append_csv_record(
3939    columns: &mut [Vec<Scalar>],
3940    raw_columns: &mut [Vec<String>],
3941    record: &StringRecord,
3942    options: &CsvReadOptions,
3943    na_set: &HashSet<&str>,
3944    true_set: &HashSet<&str>,
3945    false_set: &HashSet<&str>,
3946) {
3947    for (idx, col) in columns.iter_mut().enumerate() {
3948        let field = record.get(idx).unwrap_or_default();
3949        col.push(parse_scalar_with_options(
3950            field,
3951            options.na_filter,
3952            options.keep_default_na,
3953            na_set,
3954            true_set,
3955            false_set,
3956            options.decimal,
3957            options.thousands,
3958        ));
3959        // Keep the verbatim field so an object-fallback column can preserve the
3960        // original literal like pandas (see build_csv_object_aware_column).
3961        raw_columns[idx].push(field.to_owned());
3962    }
3963}
3964
3965fn should_skip_bad_csv_record(
3966    record: &StringRecord,
3967    expected_fields: usize,
3968    on_bad_lines: CsvOnBadLines,
3969) -> bool {
3970    if record.len() <= expected_fields {
3971        return false;
3972    }
3973
3974    match on_bad_lines {
3975        CsvOnBadLines::Error => false,
3976        CsvOnBadLines::Warn => {
3977            eprintln!(
3978                "Skipping bad CSV line: expected {expected_fields} fields, found {}",
3979                record.len()
3980            );
3981            true
3982        }
3983        CsvOnBadLines::Skip => true,
3984    }
3985}
3986
3987// ── CSV with options ───────────────────────────────────────────────────
3988
3989pub fn read_csv_with_options(input: &str, options: &CsvReadOptions) -> Result<DataFrame, IoError> {
3990    if csv_read_options_match_default_fast_path(options) {
3991        return read_csv_str(input);
3992    }
3993
3994    if csv_read_options_match_no_na_numeric_fast_path(options) {
3995        if let Some(frame) = csv_parse_cache_lookup(CsvParseCacheMode::NoNaNumeric, input) {
3996            return Ok(frame);
3997        }
3998
3999        if let Some(frame) = try_read_csv_with_options_no_na_numeric_fast_path(input)? {
4000            csv_parse_cache_store(CsvParseCacheMode::NoNaNumeric, input, &frame);
4001            return Ok(frame);
4002        }
4003    }
4004
4005    let mut builder = ReaderBuilder::new();
4006    builder
4007        .has_headers(false)
4008        .delimiter(options.delimiter)
4009        .quote(options.quotechar)
4010        .double_quote(options.doublequote)
4011        .escape(options.escapechar);
4012    if options.on_bad_lines != CsvOnBadLines::Error {
4013        builder.flexible(true);
4014    }
4015    if let Some(c) = options.comment {
4016        builder.comment(Some(c));
4017    }
4018    if let Some(term) = options.lineterminator {
4019        builder.terminator(csv::Terminator::Any(term));
4020    }
4021    let mut reader = builder.from_reader(input.as_bytes());
4022
4023    let max_rows = options.nrows.unwrap_or(usize::MAX);
4024    let skip = options.skiprows;
4025
4026    let mut records = reader.records();
4027    for _ in 0..skip {
4028        if records.next().transpose()?.is_none() {
4029            return Err(IoError::MissingHeaders);
4030        }
4031    }
4032
4033    // Per br-frankenpandas-b67a3: pre-build NA / true / false sets once,
4034    // then thread through the per-record loop. Was Vec::iter().any()
4035    // per cell.
4036    let na_set: HashSet<&str> = options.na_values.iter().map(String::as_str).collect();
4037    let true_set: HashSet<&str> = options.true_values.iter().map(String::as_str).collect();
4038    let false_set: HashSet<&str> = options.false_values.iter().map(String::as_str).collect();
4039
4040    let mut row_count: i64 = 0;
4041    // raw_columns shadows columns with each cell's verbatim text so an
4042    // object-fallback column can preserve original literals (see the final
4043    // build step and build_csv_object_aware_column).
4044    let (headers, mut columns, mut raw_columns) = if options.has_headers {
4045        let headers_record = records.next().transpose()?.ok_or(IoError::MissingHeaders)?;
4046        if headers_record.is_empty() {
4047            return Err(IoError::MissingHeaders);
4048        }
4049
4050        let header_count = headers_record.len();
4051        let row_hint = input.len() / (header_count * 8).max(1);
4052        let columns: Vec<Vec<Scalar>> = (0..header_count)
4053            .map(|_| Vec::with_capacity(row_hint))
4054            .collect();
4055        let raw_columns: Vec<Vec<String>> = (0..header_count)
4056            .map(|_| Vec::with_capacity(row_hint))
4057            .collect();
4058
4059        (
4060            headers_record
4061                .iter()
4062                .map(ToOwned::to_owned)
4063                .collect::<Vec<_>>(),
4064            columns,
4065            raw_columns,
4066        )
4067    } else {
4068        let first_record = records.next().transpose()?.ok_or(IoError::MissingHeaders)?;
4069        if first_record.is_empty() {
4070            return Err(IoError::MissingHeaders);
4071        }
4072
4073        let header_count = first_record.len();
4074        let row_hint = input.len() / (header_count * 8).max(1);
4075        let mut columns: Vec<Vec<Scalar>> = (0..header_count)
4076            .map(|_| Vec::with_capacity(row_hint))
4077            .collect();
4078        let mut raw_columns: Vec<Vec<String>> = (0..header_count)
4079            .map(|_| Vec::with_capacity(row_hint))
4080            .collect();
4081
4082        if (row_count as usize) < max_rows {
4083            append_csv_record(
4084                &mut columns,
4085                &mut raw_columns,
4086                &first_record,
4087                options,
4088                &na_set,
4089                &true_set,
4090                &false_set,
4091            );
4092            row_count += 1;
4093        }
4094
4095        (
4096            (0..header_count)
4097                .map(|idx| format!("column_{idx}"))
4098                .collect(),
4099            columns,
4100            raw_columns,
4101        )
4102    };
4103
4104    for row in records {
4105        if (row_count as usize) >= max_rows {
4106            break;
4107        }
4108        let record = row?;
4109        if should_skip_bad_csv_record(&record, columns.len(), options.on_bad_lines) {
4110            continue;
4111        }
4112        append_csv_record(
4113            &mut columns,
4114            &mut raw_columns,
4115            &record,
4116            options,
4117            &na_set,
4118            &true_set,
4119            &false_set,
4120        );
4121        row_count += 1;
4122    }
4123
4124    // Drop the last `skipfooter` data rows. Matches pandas semantics:
4125    // footer rows are dropped *after* header parsing and nrows limit.
4126    if options.skipfooter > 0 && (row_count as usize) > 0 {
4127        let drop = options.skipfooter.min(row_count as usize);
4128        for col in columns.iter_mut() {
4129            let new_len = col.len().saturating_sub(drop);
4130            col.truncate(new_len);
4131        }
4132        for col in raw_columns.iter_mut() {
4133            let new_len = col.len().saturating_sub(drop);
4134            col.truncate(new_len);
4135        }
4136        row_count -= drop as i64;
4137    }
4138    reject_duplicate_headers(&headers)?;
4139    if let Some(ref usecols) = options.usecols {
4140        validate_usecols(&headers, usecols)?;
4141    }
4142
4143    // Apply usecols filter: keep only selected columns.
4144    let (mut headers, mut columns, raw_columns) = if let Some(ref usecols) = options.usecols {
4145        let mut fh = Vec::new();
4146        let mut fc = Vec::new();
4147        let mut fr = Vec::new();
4148        for ((h, c), r) in headers.into_iter().zip(columns).zip(raw_columns) {
4149            if usecols.contains(&h) {
4150                fh.push(h);
4151                fc.push(c);
4152                fr.push(r);
4153            }
4154        }
4155        (fh, fc, fr)
4156    } else {
4157        (headers, columns, raw_columns)
4158    };
4159
4160    if let Some(ref parse_date_combinations) = options.parse_date_combinations {
4161        apply_parse_date_combinations(&mut headers, &mut columns, parse_date_combinations)?;
4162    }
4163
4164    if let Some(ref named) = options.parse_date_combinations_named {
4165        apply_parse_date_combinations_named(&mut headers, &mut columns, named)?;
4166    }
4167
4168    if let Some(ref parse_dates) = options.parse_dates {
4169        apply_parse_dates(&headers, &mut columns, parse_dates)?;
4170    }
4171
4172    apply_pandas_csv_numeric_promotions(&mut columns);
4173
4174    // Apply dtype coercion if specified.
4175    if let Some(ref dtype_map) = options.dtype {
4176        for (i, name) in headers.iter().enumerate() {
4177            if let Some(&target_dt) = dtype_map.get(name) {
4178                let coerced = columns[i]
4179                    .iter()
4180                    .map(|v| fp_types::cast_scalar(v, target_dt))
4181                    .collect::<Result<Vec<_>, _>>()
4182                    .map_err(|err| IoError::Column(ColumnError::from(err)))?;
4183                columns[i] = coerced;
4184            }
4185        }
4186    }
4187
4188    let header_count = headers.len();
4189
4190    // Object-fallback columns should keep verbatim source text (pandas parity,
4191    // see build_csv_object_aware_column). Only safe when no parse-date transform
4192    // rewrote/added columns (which would desync raw_columns from columns); a
4193    // per-column dtype override is also excluded so explicit astype wins.
4194    let preserve_object_text = options.parse_dates.is_none()
4195        && options.parse_date_combinations.is_none()
4196        && options.parse_date_combinations_named.is_none();
4197    let dtype_forced = |name: &str| -> bool {
4198        options
4199            .dtype
4200            .as_ref()
4201            .is_some_and(|map| map.contains_key(name))
4202    };
4203
4204    // If index_col is set, extract that column as the index
4205    if let Some(ref idx_col_name) = options.index_col {
4206        let idx_pos = headers
4207            .iter()
4208            .position(|h| h == idx_col_name)
4209            .ok_or_else(|| IoError::MissingIndexColumn(idx_col_name.clone()))?;
4210
4211        let index_values = columns.remove(idx_pos);
4212        let index_labels: Vec<fp_index::IndexLabel> = index_values
4213            .into_iter()
4214            .map(|s| match s {
4215                Scalar::Int64(v) => fp_index::IndexLabel::Int64(v),
4216                Scalar::Utf8(v) => fp_index::IndexLabel::Utf8(v),
4217                Scalar::Float64(v) => fp_index::IndexLabel::Utf8(v.to_string()),
4218                Scalar::Bool(v) => {
4219                    fp_index::IndexLabel::Utf8(if v { "True" } else { "False" }.to_string())
4220                }
4221                // Typed null label (br-frankenpandas-8m6ay): pandas
4222                // read_csv(index_col=...) keeps a real nan index entry for an
4223                // empty cell. Kind-preserving bijection.
4224                Scalar::Null(kind) => fp_index::IndexLabel::Null(kind),
4225                Scalar::Timedelta64(v) => {
4226                    if v == Timedelta::NAT {
4227                        fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4228                    } else {
4229                        fp_index::IndexLabel::Utf8(Timedelta::format(v))
4230                    }
4231                }
4232                Scalar::Datetime64(v) => {
4233                    if v == Timestamp::NAT {
4234                        fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4235                    } else {
4236                        fp_index::IndexLabel::Utf8(format_datetime_ns(v))
4237                    }
4238                }
4239                Scalar::Period(v) => {
4240                    if v == i64::MIN {
4241                        fp_index::IndexLabel::Utf8("<NaT>".to_owned())
4242                    } else {
4243                        fp_index::IndexLabel::Utf8(format!("Period[{v}]"))
4244                    }
4245                }
4246                Scalar::Interval(iv) => fp_index::IndexLabel::Utf8(format!("{iv}")),
4247            })
4248            .collect();
4249        // Per br-frankenpandas-l0vbr: pandas pd.read_csv(index_col='col')
4250        // sets result.index.name = 'col'.
4251        let index = Index::new(index_labels).set_name(idx_col_name);
4252
4253        let mut out_columns = BTreeMap::new();
4254        let mut column_order = Vec::with_capacity(headers.len() - 1);
4255        let mut col_idx = 0;
4256        for (orig_idx, _) in headers.iter().enumerate() {
4257            if orig_idx == idx_pos {
4258                continue;
4259            }
4260            let name = headers.get(orig_idx).cloned().unwrap_or_default();
4261            let column = if preserve_object_text && !dtype_forced(&name) {
4262                build_csv_object_aware_column(columns[col_idx].clone(), &raw_columns[orig_idx])?
4263            } else {
4264                Column::from_values(columns[col_idx].clone())?
4265            };
4266            out_columns.insert(name.clone(), column);
4267            column_order.push(name);
4268            col_idx += 1;
4269        }
4270        Ok(DataFrame::new_with_column_order(
4271            index,
4272            out_columns,
4273            column_order,
4274        )?)
4275    } else {
4276        let mut out_columns = BTreeMap::new();
4277        let mut column_order = Vec::with_capacity(header_count);
4278        for (idx, values) in columns.into_iter().enumerate() {
4279            let name = headers.get(idx).cloned().unwrap_or_default();
4280            let column = if preserve_object_text && !dtype_forced(&name) {
4281                build_csv_object_aware_column(values, &raw_columns[idx])?
4282            } else {
4283                Column::from_values(values)?
4284            };
4285            out_columns.insert(name.clone(), column);
4286            column_order.push(name);
4287        }
4288        let index = csv_default_unit_range_index(row_count);
4289        Ok(DataFrame::new_with_column_order(
4290            index,
4291            out_columns,
4292            column_order,
4293        )?)
4294    }
4295}
4296
4297/// Read CSV and promote the named columns into a row index / row MultiIndex.
4298///
4299/// For multiple names this mirrors pandas `index_col=[...]`.
4300pub fn read_csv_with_index_cols(
4301    input: &str,
4302    options: &CsvReadOptions,
4303    index_cols: &[&str],
4304) -> Result<DataFrame, IoError> {
4305    let frame = read_csv_with_options(input, options)?;
4306    promote_frame_index_columns(&frame, index_cols)
4307}
4308
4309// ── File-based CSV ─────────────────────────────────────────────────────
4310
4311pub fn read_csv(path: &Path) -> Result<DataFrame, IoError> {
4312    read_csv_with_options_path(path, &CsvReadOptions::default())
4313}
4314
4315pub fn read_csv_with_options_path(
4316    path: &Path,
4317    options: &CsvReadOptions,
4318) -> Result<DataFrame, IoError> {
4319    let content = std::fs::read_to_string(path)?;
4320    read_csv_with_options(&content, options)
4321}
4322
4323pub fn read_csv_with_index_cols_path(
4324    path: &Path,
4325    options: &CsvReadOptions,
4326    index_cols: &[&str],
4327) -> Result<DataFrame, IoError> {
4328    let content = std::fs::read_to_string(path)?;
4329    read_csv_with_index_cols(&content, options, index_cols)
4330}
4331
4332pub fn write_csv(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4333    let content = write_csv_string(frame)?;
4334    std::fs::write(path, content)?;
4335    Ok(())
4336}
4337
4338// ── read_table (tab-separated thin wrapper) ────────────────────────────
4339
4340/// Parse a tab-separated string, matching `pd.read_table(io.StringIO(s))`.
4341///
4342/// Equivalent to [`read_csv_str`] with `delimiter=b'\t'`. Other defaults
4343/// match pandas `read_table`: `header='infer'`, default NA values, no
4344/// index column promotion. Use [`read_table_with_options`] for full
4345/// option control.
4346pub fn read_table_str(input: &str) -> Result<DataFrame, IoError> {
4347    let opts = CsvReadOptions {
4348        delimiter: b'\t',
4349        ..CsvReadOptions::default()
4350    };
4351    read_csv_with_options(input, &opts)
4352}
4353
4354/// Parse a tab-separated string with explicit options. The caller-supplied
4355/// `options.delimiter` is preserved when it differs from the comma default
4356/// to allow override; otherwise it is forced to `b'\t'` so that the
4357/// pandas `read_table` semantics survive `CsvReadOptions::default()`.
4358pub fn read_table_with_options(
4359    input: &str,
4360    options: &CsvReadOptions,
4361) -> Result<DataFrame, IoError> {
4362    let mut effective = options.clone();
4363    if effective.delimiter == b',' {
4364        effective.delimiter = b'\t';
4365    }
4366    read_csv_with_options(input, &effective)
4367}
4368
4369/// Read a tab-separated file from disk, matching `pd.read_table(path)`.
4370pub fn read_table(path: &Path) -> Result<DataFrame, IoError> {
4371    let opts = CsvReadOptions {
4372        delimiter: b'\t',
4373        ..CsvReadOptions::default()
4374    };
4375    read_csv_with_options_path(path, &opts)
4376}
4377
4378/// Read a tab-separated file from disk with explicit options. The
4379/// caller-supplied delimiter is honored when it has been overridden from
4380/// the comma default; otherwise it is forced to `b'\t'`.
4381pub fn read_table_with_options_path(
4382    path: &Path,
4383    options: &CsvReadOptions,
4384) -> Result<DataFrame, IoError> {
4385    let mut effective = options.clone();
4386    if effective.delimiter == b',' {
4387        effective.delimiter = b'\t';
4388    }
4389    read_csv_with_options_path(path, &effective)
4390}
4391
4392// ── read_fwf (fixed-width file reader) ─────────────────────────────────
4393
4394/// Read a fixed-width file from disk, matching `pd.read_fwf(path, ...)`.
4395///
4396/// See [`read_fwf_str`] for the option semantics. When neither explicit
4397/// `colspecs` nor `widths` are supplied, column ranges are inferred from
4398/// non-whitespace runs.
4399pub fn read_fwf(path: &Path, options: &FwfReadOptions) -> Result<DataFrame, IoError> {
4400    let content = std::fs::read_to_string(path)?;
4401    read_fwf_str(&content, options)
4402}
4403
4404// ── Deferred reader surfaces ───────────────────────────────────────────
4405//
4406// pandas exposes pd.read_clipboard / pd.read_gbq / pd.read_sas / pd.read_spss.
4407// Each is out of scope for FrankenPandas's local file-format charter:
4408//
4409//   * read_clipboard pulls from the OS clipboard (GUI-only, headless-hostile).
4410//   * read_gbq calls Google BigQuery (external service, GCP credentials).
4411//   * read_sas / read_spss are proprietary statistical-software formats with
4412//     no first-party Rust reader at parity (pandas calls into pyreadstat /
4413//     sas7bdat).
4414//
4415// Following the deferral precedent in fp-frame for plotting (see
4416// `plotting_deferred`), expose typed reject-closed entry points so callers
4417// can program against the surface and fall through to a clean error rather
4418// than a missing symbol.
4419
4420fn deferred_reader_error(method: &str, reason: &str) -> IoError {
4421    IoError::Deferred(format!(
4422        "{method}: in scope but deferred; {reason}. Use the pandas surface in the meantime."
4423    ))
4424}
4425
4426fn deferred_writer_error(method: &str, reason: &str) -> IoError {
4427    IoError::Deferred(format!(
4428        "{method}: in scope but deferred; {reason}. Use the pandas surface in the meantime."
4429    ))
4430}
4431
4432/// Reject-closed clipboard reader, matching `pd.read_clipboard()` shape.
4433pub fn read_clipboard() -> Result<DataFrame, IoError> {
4434    Err(deferred_reader_error(
4435        "read_clipboard",
4436        "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
4437    ))
4438}
4439
4440/// Reject-closed BigQuery reader, matching `pd.read_gbq(query, project_id)`.
4441pub fn read_gbq(_query: &str, _project_id: Option<&str>) -> Result<DataFrame, IoError> {
4442    Err(deferred_reader_error(
4443        "read_gbq",
4444        "Google BigQuery integration is outside FrankenPandas's local file-format scope",
4445    ))
4446}
4447
4448/// Reject-closed SAS reader, matching `pd.read_sas(path)`.
4449pub fn read_sas(_path: &Path) -> Result<DataFrame, IoError> {
4450    Err(deferred_reader_error(
4451        "read_sas",
4452        "no first-party Rust SAS sas7bdat/xport reader exists at pandas-parity yet",
4453    ))
4454}
4455
4456/// Reject-closed SPSS reader, matching `pd.read_spss(path)`.
4457pub fn read_spss(_path: &Path) -> Result<DataFrame, IoError> {
4458    Err(deferred_reader_error(
4459        "read_spss",
4460        "no first-party Rust SPSS .sav reader exists at pandas-parity yet",
4461    ))
4462}
4463
4464// ── File-based Markdown / LaTeX ────────────────────────────────────────
4465
4466/// Write a DataFrame to a Markdown table file.
4467pub fn write_markdown(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4468    write_markdown_with_options(frame, path, &MarkdownWriteOptions::default())
4469}
4470
4471/// Write a DataFrame to a Markdown table file with explicit options.
4472pub fn write_markdown_with_options(
4473    frame: &DataFrame,
4474    path: &Path,
4475    options: &MarkdownWriteOptions,
4476) -> Result<(), IoError> {
4477    let content = write_markdown_string_with_options(frame, options)?;
4478    std::fs::write(path, content)?;
4479    Ok(())
4480}
4481
4482/// Write a DataFrame to a LaTeX tabular file.
4483pub fn write_latex(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4484    write_latex_with_options(frame, path, &LatexWriteOptions::default())
4485}
4486
4487/// Write a DataFrame to a LaTeX tabular file with explicit options.
4488pub fn write_latex_with_options(
4489    frame: &DataFrame,
4490    path: &Path,
4491    options: &LatexWriteOptions,
4492) -> Result<(), IoError> {
4493    let content = write_latex_string_with_options(frame, options)?;
4494    std::fs::write(path, content)?;
4495    Ok(())
4496}
4497
4498// ── File-based HTML ────────────────────────────────────────────────────
4499
4500pub fn read_html(path: &Path) -> Result<DataFrame, IoError> {
4501    read_html_with_options(path, &HtmlReadOptions::default())
4502}
4503
4504pub fn read_html_with_options(
4505    path: &Path,
4506    options: &HtmlReadOptions,
4507) -> Result<DataFrame, IoError> {
4508    let content = std::fs::read_to_string(path)?;
4509    read_html_str_with_options(&content, options)
4510}
4511
4512pub fn write_html(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4513    write_html_with_options(frame, path, &HtmlWriteOptions::default())
4514}
4515
4516pub fn write_html_with_options(
4517    frame: &DataFrame,
4518    path: &Path,
4519    options: &HtmlWriteOptions,
4520) -> Result<(), IoError> {
4521    let content = write_html_string_with_options(frame, options)?;
4522    std::fs::write(path, content)?;
4523    Ok(())
4524}
4525
4526// ── File-based XML ─────────────────────────────────────────────────────
4527
4528pub fn write_xml(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
4529    write_xml_with_options(frame, path, &XmlWriteOptions::default())
4530}
4531
4532pub fn write_xml_with_options(
4533    frame: &DataFrame,
4534    path: &Path,
4535    options: &XmlWriteOptions,
4536) -> Result<(), IoError> {
4537    let content = write_xml_string_with_options(frame, options)?;
4538    std::fs::write(path, content)?;
4539    Ok(())
4540}
4541
4542// ── File-based XML readers ─────────────────────────────────────────────
4543
4544pub fn read_xml(path: &Path) -> Result<DataFrame, IoError> {
4545    read_xml_with_options(path, &XmlReadOptions::default())
4546}
4547
4548pub fn read_xml_with_options(path: &Path, options: &XmlReadOptions) -> Result<DataFrame, IoError> {
4549    let content = std::fs::read_to_string(path)?;
4550    read_xml_str_with_options(&content, options)
4551}
4552
4553// ── JSON IO ────────────────────────────────────────────────────────────
4554
4555fn json_value_to_scalar(val: &serde_json::Value) -> Scalar {
4556    match val {
4557        serde_json::Value::Null => Scalar::Null(NullKind::Null),
4558        serde_json::Value::Bool(b) => Scalar::Bool(*b),
4559        serde_json::Value::Number(n) => {
4560            if let Some(i) = n.as_i64() {
4561                Scalar::Int64(i)
4562            } else if let Some(f) = n.as_f64() {
4563                Scalar::Float64(f)
4564            } else {
4565                Scalar::Utf8(n.to_string())
4566            }
4567        }
4568        serde_json::Value::String(s) => Scalar::Utf8(s.clone()),
4569        other => Scalar::Utf8(other.to_string()),
4570    }
4571}
4572
4573fn parse_json_value_allowing_pandas_nan(input: &str) -> Result<serde_json::Value, IoError> {
4574    match serde_json::from_str(input) {
4575        Ok(value) => Ok(value),
4576        Err(original) => {
4577            let normalized = normalize_bare_json_nan_tokens(input);
4578            if normalized == input {
4579                return Err(original.into());
4580            }
4581            serde_json::from_str(&normalized).map_err(IoError::from)
4582        }
4583    }
4584}
4585
4586fn normalize_bare_json_nan_tokens(input: &str) -> String {
4587    let mut output = String::with_capacity(input.len());
4588    let mut index = 0;
4589    let mut in_string = false;
4590    let mut escaped = false;
4591
4592    while index < input.len() {
4593        let rest = &input[index..];
4594        let Some(ch) = rest.chars().next() else {
4595            break;
4596        };
4597
4598        if in_string {
4599            output.push(ch);
4600            index += ch.len_utf8();
4601            if escaped {
4602                escaped = false;
4603            } else if ch == '\\' {
4604                escaped = true;
4605            } else if ch == '"' {
4606                in_string = false;
4607            }
4608            continue;
4609        }
4610
4611        if ch == '"' {
4612            in_string = true;
4613            output.push(ch);
4614            index += ch.len_utf8();
4615            continue;
4616        }
4617
4618        if rest.starts_with("NaN")
4619            && is_json_value_start_boundary(input, index)
4620            && is_json_value_end_boundary(input, index + 3)
4621        {
4622            output.push_str("null");
4623            index += 3;
4624            continue;
4625        }
4626
4627        output.push(ch);
4628        index += ch.len_utf8();
4629    }
4630
4631    output
4632}
4633
4634fn is_json_value_start_boundary(input: &str, index: usize) -> bool {
4635    input[..index]
4636        .chars()
4637        .rev()
4638        .find(|ch| !ch.is_whitespace())
4639        .is_none_or(|ch| matches!(ch, ':' | '[' | ','))
4640}
4641
4642fn is_json_value_end_boundary(input: &str, index: usize) -> bool {
4643    input[index..]
4644        .chars()
4645        .find(|ch| !ch.is_whitespace())
4646        .is_none_or(|ch| matches!(ch, ',' | ']' | '}'))
4647}
4648
4649fn column_from_json_values(values: Vec<Scalar>) -> Result<Column, IoError> {
4650    let saw_utf8 = values.iter().any(|value| matches!(value, Scalar::Utf8(_)));
4651    let saw_missing = values.iter().any(Scalar::is_missing);
4652    let saw_numeric_like = values.iter().any(|value| {
4653        matches!(
4654            value,
4655            Scalar::Int64(_) | Scalar::Float64(_) | Scalar::Bool(_)
4656        )
4657    });
4658
4659    if !saw_utf8 && saw_missing && (saw_numeric_like || values.iter().all(Scalar::is_missing)) {
4660        let promoted = values
4661            .into_iter()
4662            .map(|value| match value {
4663                Scalar::Int64(value) => Scalar::Float64(value as f64),
4664                Scalar::Bool(value) => Scalar::Float64(if value { 1.0 } else { 0.0 }),
4665                Scalar::Null(_) => Scalar::Null(NullKind::NaN),
4666                other => other,
4667            })
4668            .collect();
4669        return Column::new(DType::Float64, promoted).map_err(IoError::from);
4670    }
4671
4672    Column::from_values(values).map_err(IoError::from)
4673}
4674
4675fn scalar_to_json(scalar: &Scalar) -> serde_json::Value {
4676    match scalar {
4677        Scalar::Null(_) => serde_json::Value::Null,
4678        Scalar::Bool(b) => serde_json::Value::Bool(*b),
4679        Scalar::Int64(i) => serde_json::json!(*i),
4680        Scalar::Float64(f) => {
4681            if f.is_nan() || f.is_infinite() {
4682                serde_json::Value::Null
4683            } else {
4684                serde_json::json!(*f)
4685            }
4686        }
4687        Scalar::Utf8(s) => serde_json::Value::String(s.clone()),
4688        // pandas to_json (default date_format='epoch', date_unit='ms') serializes
4689        // datetime64 and timedelta64 as epoch-MILLISECOND integers, not strings.
4690        // (br-frankenpandas-lb0iu)
4691        Scalar::Timedelta64(v) => {
4692            if *v == Timedelta::NAT {
4693                serde_json::Value::Null
4694            } else {
4695                serde_json::json!(*v / 1_000_000)
4696            }
4697        }
4698        Scalar::Datetime64(v) => {
4699            if *v == Timestamp::NAT {
4700                serde_json::Value::Null
4701            } else {
4702                serde_json::json!(*v / 1_000_000)
4703            }
4704        }
4705        Scalar::Period(v) => {
4706            if *v == i64::MIN {
4707                serde_json::Value::Null
4708            } else {
4709                serde_json::Value::String(format!("Period[{v}]"))
4710            }
4711        }
4712        Scalar::Interval(iv) => serde_json::Value::String(format!("{iv}")),
4713    }
4714}
4715
4716fn column_promotes_int_json_values_to_float(_values: &[Scalar]) -> bool {
4717    // DISC-011: Nullable extension Int64 dtype parity.
4718    // Pandas (since v0.24) preserves Int64 via a separate validity mask when
4719    // null values are present. We now match: Int64 values serialize as integers,
4720    // Null values serialize as null, no Float64 promotion.
4721    false
4722}
4723
4724fn scalar_to_json_with_column_promotion(
4725    scalar: &Scalar,
4726    promote_int_to_float: bool,
4727) -> serde_json::Value {
4728    if promote_int_to_float && let Scalar::Int64(v) = scalar {
4729        return serde_json::json!(*v as f64);
4730    }
4731    scalar_to_json(scalar)
4732}
4733
4734fn json_value_to_index_label(value: &serde_json::Value) -> IndexLabel {
4735    match value {
4736        serde_json::Value::Number(n) => n
4737            .as_i64()
4738            .map(IndexLabel::Int64)
4739            .unwrap_or_else(|| IndexLabel::Utf8(n.to_string())),
4740        serde_json::Value::String(s) => IndexLabel::Utf8(s.clone()),
4741        serde_json::Value::Bool(b) => IndexLabel::Utf8(b.to_string()),
4742        serde_json::Value::Null => IndexLabel::Utf8("null".to_owned()),
4743        other => IndexLabel::Utf8(other.to_string()),
4744    }
4745}
4746
4747fn json_value_to_column_name(value: &serde_json::Value) -> String {
4748    match value {
4749        serde_json::Value::String(s) => s.clone(),
4750        serde_json::Value::Number(n) => n.to_string(),
4751        serde_json::Value::Bool(b) => b.to_string(),
4752        serde_json::Value::Null => "null".to_owned(),
4753        other => other.to_string(),
4754    }
4755}
4756
4757fn json_key_to_index_label(value: &str) -> IndexLabel {
4758    value
4759        .parse::<i64>()
4760        .map(IndexLabel::Int64)
4761        .unwrap_or_else(|_| IndexLabel::Utf8(value.to_owned()))
4762}
4763
4764fn index_label_to_json(label: &IndexLabel) -> serde_json::Value {
4765    match label {
4766        IndexLabel::Int64(v) => serde_json::json!(*v),
4767        IndexLabel::Utf8(v) => serde_json::Value::String(v.clone()),
4768        // Epoch-millisecond ints, matching pandas to_json (date_unit='ms') and
4769        // the value path above — previously emitted raw nanoseconds, which
4770        // matched neither pandas nor FP's own value serialization.
4771        // (br-frankenpandas-lb0iu)
4772        IndexLabel::Timedelta64(ns) => serde_json::json!(*ns / 1_000_000),
4773        IndexLabel::Datetime64(ns) => serde_json::json!(*ns / 1_000_000),
4774        // pandas to_json renders a missing label as JSON null.
4775        IndexLabel::Null(_) => serde_json::Value::Null,
4776    }
4777}
4778
4779/// Stringified index label for use as a JSON object key (columns/index orients).
4780/// Temporal labels become epoch-millisecond strings, matching pandas to_json
4781/// (e.g. a 2020-01-01 index key is "1577836800000", not "2020-01-01 00:00:00").
4782/// (br-frankenpandas-lb0iu)
4783fn index_label_json_key(label: &IndexLabel) -> String {
4784    match label {
4785        IndexLabel::Datetime64(ns) | IndexLabel::Timedelta64(ns) => (*ns / 1_000_000).to_string(),
4786        other => other.to_string(),
4787    }
4788}
4789
4790const SYNTHETIC_ROW_MULTIINDEX_PREFIX: &str = "__index_level_";
4791
4792fn index_label_to_scalar_value(label: &IndexLabel) -> Scalar {
4793    match label {
4794        IndexLabel::Int64(v) => Scalar::Int64(*v),
4795        IndexLabel::Utf8(v) => Scalar::Utf8(v.clone()),
4796        IndexLabel::Timedelta64(v) => Scalar::Timedelta64(*v),
4797        IndexLabel::Datetime64(v) => Scalar::Utf8(format_datetime_ns(*v)),
4798        // Typed-null label round-trips to the same-kind missing scalar.
4799        IndexLabel::Null(kind) => Scalar::Null(*kind),
4800    }
4801}
4802
4803fn synthetic_row_multiindex_names(nlevels: usize) -> Vec<String> {
4804    (0..nlevels)
4805        .map(|level| format!("{SYNTHETIC_ROW_MULTIINDEX_PREFIX}{level}__"))
4806        .collect()
4807}
4808
4809fn materialize_row_multiindex_columns(
4810    frame: &DataFrame,
4811    names: &[String],
4812) -> Result<DataFrame, IoError> {
4813    let Some(row_multiindex) = frame.row_multiindex() else {
4814        return Ok(frame.clone());
4815    };
4816
4817    let mut columns = BTreeMap::new();
4818    let mut column_order = Vec::with_capacity(names.len() + frame.column_names().len());
4819    for (level, name) in names.iter().enumerate() {
4820        let level_index = row_multiindex.get_level_values(level)?;
4821        let values = level_index
4822            .labels()
4823            .iter()
4824            .map(index_label_to_scalar_value)
4825            .collect::<Vec<_>>();
4826        columns.insert(name.clone(), Column::from_values(values)?);
4827        column_order.push(name.clone());
4828    }
4829
4830    for name in frame.column_names() {
4831        let column = frame
4832            .column(name)
4833            .ok_or_else(|| {
4834                IoError::Frame(FrameError::CompatibilityRejected(format!(
4835                    "column not found: '{name}'"
4836                )))
4837            })?
4838            .clone();
4839        columns.insert(name.clone(), column);
4840        column_order.push(name.clone());
4841    }
4842
4843    let index = Index::from_i64((0..frame.len() as i64).collect());
4844    DataFrame::new_with_column_order(index, columns, column_order).map_err(IoError::from)
4845}
4846
4847fn materialize_named_row_multiindex_columns(frame: &DataFrame) -> Result<DataFrame, IoError> {
4848    if frame.row_multiindex().is_some() {
4849        frame.reset_index(false).map_err(IoError::from)
4850    } else {
4851        Ok(frame.clone())
4852    }
4853}
4854
4855fn materialize_synthetic_row_multiindex_columns(frame: &DataFrame) -> Result<DataFrame, IoError> {
4856    let Some(row_multiindex) = frame.row_multiindex() else {
4857        return Ok(frame.clone());
4858    };
4859    let names = synthetic_row_multiindex_names(row_multiindex.nlevels());
4860    materialize_row_multiindex_columns(frame, &names)
4861}
4862
4863fn promote_frame_index_columns(
4864    frame: &DataFrame,
4865    index_cols: &[&str],
4866) -> Result<DataFrame, IoError> {
4867    if index_cols.is_empty() {
4868        return Ok(frame.clone());
4869    }
4870    if index_cols.len() == 1 {
4871        frame.set_index(index_cols[0], true).map_err(IoError::from)
4872    } else {
4873        frame
4874            .set_index_multi(index_cols, true, "|")
4875            .map_err(IoError::from)
4876    }
4877}
4878
4879fn detect_synthetic_row_multiindex_columns(frame: &DataFrame) -> Vec<String> {
4880    let mut out = Vec::new();
4881    for (level, name) in frame.column_names().iter().enumerate() {
4882        let expected = format!("{SYNTHETIC_ROW_MULTIINDEX_PREFIX}{level}__");
4883        if **name == expected {
4884            out.push(expected);
4885        } else {
4886            break;
4887        }
4888    }
4889    out
4890}
4891
4892fn promote_synthetic_row_multiindex_if_present(frame: &DataFrame) -> Result<DataFrame, IoError> {
4893    let synthetic_cols = detect_synthetic_row_multiindex_columns(frame);
4894    if synthetic_cols.len() < 2 {
4895        return Ok(frame.clone());
4896    }
4897    let refs = synthetic_cols
4898        .iter()
4899        .map(String::as_str)
4900        .collect::<Vec<_>>();
4901    promote_frame_index_columns(frame, &refs)
4902}
4903
4904pub fn read_json_str(input: &str, orient: JsonOrient) -> Result<DataFrame, IoError> {
4905    let parsed = parse_json_value_allowing_pandas_nan(input)?;
4906
4907    match orient {
4908        JsonOrient::Records => {
4909            let arr = parsed
4910                .as_array()
4911                .ok_or_else(|| IoError::JsonFormat("expected array for records orient".into()))?;
4912            if arr.is_empty() {
4913                return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
4914            }
4915
4916            // Collect column names from all records to handle heterogeneous keys
4917            let mut col_names_set = std::collections::BTreeSet::new();
4918            let mut col_names = Vec::new();
4919            for record in arr {
4920                let obj = record
4921                    .as_object()
4922                    .ok_or_else(|| IoError::JsonFormat("each record must be an object".into()))?;
4923                for key in obj.keys() {
4924                    if col_names_set.insert(key.clone()) {
4925                        col_names.push(key.clone());
4926                    }
4927                }
4928            }
4929
4930            let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
4931            for name in &col_names {
4932                columns.insert(name.clone(), Vec::with_capacity(arr.len()));
4933            }
4934
4935            for record in arr {
4936                let obj = record
4937                    .as_object()
4938                    .ok_or_else(|| IoError::JsonFormat("each record must be an object".into()))?;
4939                for name in &col_names {
4940                    let val = obj.get(name).unwrap_or(&serde_json::Value::Null);
4941                    columns
4942                        .get_mut(name)
4943                        .ok_or_else(|| {
4944                            IoError::JsonFormat(format!(
4945                                "records orient missing column accumulator for '{name}'"
4946                            ))
4947                        })?
4948                        .push(json_value_to_scalar(val));
4949                }
4950            }
4951
4952            let row_count = arr.len() as i64;
4953            let mut out = BTreeMap::new();
4954            for (name, vals) in columns {
4955                out.insert(name, column_from_json_values(vals)?);
4956            }
4957            let index = Index::from_i64((0..row_count).collect());
4958            let frame = DataFrame::new_with_column_order(index, out, col_names)?;
4959            promote_synthetic_row_multiindex_if_present(&frame)
4960        }
4961        JsonOrient::Columns => {
4962            let obj = parsed
4963                .as_object()
4964                .ok_or_else(|| IoError::JsonFormat("expected object for columns orient".into()))?;
4965
4966            if obj.is_empty() {
4967                return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
4968            }
4969
4970            let mut raw_columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
4971            let mut column_order = Vec::with_capacity(obj.len());
4972            let mut index_labels = Vec::new();
4973            let mut index_lookup = BTreeMap::new();
4974            for (col_name, col_data) in obj {
4975                let col_obj = col_data.as_object().ok_or_else(|| {
4976                    IoError::JsonFormat("column data must be {index: val}".into())
4977                })?;
4978                let mut values = vec![Scalar::Null(NullKind::Null); index_labels.len()];
4979                for (label_key, val) in col_obj {
4980                    let label = json_key_to_index_label(label_key);
4981                    let row_idx = if let Some(&existing_idx) = index_lookup.get(&label) {
4982                        existing_idx
4983                    } else {
4984                        let next_idx = index_labels.len();
4985                        index_labels.push(label.clone());
4986                        index_lookup.insert(label, next_idx);
4987                        for existing_values in raw_columns.values_mut() {
4988                            existing_values.push(Scalar::Null(NullKind::Null));
4989                        }
4990                        values.push(Scalar::Null(NullKind::Null));
4991                        next_idx
4992                    };
4993                    if row_idx >= values.len() {
4994                        values.resize(index_labels.len(), Scalar::Null(NullKind::Null));
4995                    }
4996                    values[row_idx] = json_value_to_scalar(val);
4997                }
4998                if values.len() < index_labels.len() {
4999                    values.resize(index_labels.len(), Scalar::Null(NullKind::Null));
5000                }
5001                raw_columns.insert(col_name.clone(), values);
5002                column_order.push(col_name.clone());
5003            }
5004
5005            let mut out = BTreeMap::new();
5006            for (name, vals) in raw_columns {
5007                out.insert(name, column_from_json_values(vals)?);
5008            }
5009
5010            let frame =
5011                DataFrame::new_with_column_order(Index::new(index_labels), out, column_order)?;
5012            promote_synthetic_row_multiindex_if_present(&frame)
5013        }
5014        JsonOrient::Index => {
5015            let obj = parsed
5016                .as_object()
5017                .ok_or_else(|| IoError::JsonFormat("expected object for index orient".into()))?;
5018
5019            if obj.is_empty() {
5020                return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
5021            }
5022
5023            let mut index_labels = Vec::with_capacity(obj.len());
5024            let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
5025            let mut column_order = Vec::new();
5026            let mut seen_columns = std::collections::HashSet::new();
5027
5028            for (row_label, row_data) in obj {
5029                let row_obj = row_data.as_object().ok_or_else(|| {
5030                    IoError::JsonFormat("index orient rows must be objects".into())
5031                })?;
5032
5033                let row_idx = index_labels.len();
5034
5035                // Pre-fill this row as null for all known columns, then overwrite present cells.
5036                for values in columns.values_mut() {
5037                    values.push(Scalar::Null(NullKind::Null));
5038                }
5039
5040                let parsed_label = row_label
5041                    .parse::<i64>()
5042                    .map(IndexLabel::Int64)
5043                    .unwrap_or_else(|_| IndexLabel::Utf8(row_label.clone()));
5044                index_labels.push(parsed_label);
5045
5046                for (col_name, value) in row_obj {
5047                    if seen_columns.insert(col_name.clone()) {
5048                        column_order.push(col_name.clone());
5049                    }
5050                    let scalar = json_value_to_scalar(value);
5051                    if let Some(values) = columns.get_mut(col_name) {
5052                        values[row_idx] = scalar;
5053                    } else {
5054                        let mut values = vec![Scalar::Null(NullKind::Null); row_idx + 1];
5055                        values[row_idx] = scalar;
5056                        columns.insert(col_name.clone(), values);
5057                    }
5058                }
5059            }
5060
5061            let mut out = BTreeMap::new();
5062            for (name, vals) in columns {
5063                out.insert(name, column_from_json_values(vals)?);
5064            }
5065            let frame =
5066                DataFrame::new_with_column_order(Index::new(index_labels), out, column_order)?;
5067            promote_synthetic_row_multiindex_if_present(&frame)
5068        }
5069        JsonOrient::Split => {
5070            let obj = parsed
5071                .as_object()
5072                .ok_or_else(|| IoError::JsonFormat("expected object for split orient".into()))?;
5073
5074            let col_names: Vec<String> = obj
5075                .get("columns")
5076                .and_then(|v| v.as_array())
5077                .ok_or_else(|| IoError::JsonFormat("split orient needs 'columns' array".into()))?
5078                .iter()
5079                .map(json_value_to_column_name)
5080                .collect();
5081            reject_duplicate_headers(&col_names)?;
5082
5083            let data = obj
5084                .get("data")
5085                .and_then(|v| v.as_array())
5086                .ok_or_else(|| IoError::JsonFormat("split orient needs 'data' array".into()))?;
5087
5088            let explicit_index = obj
5089                .get("index")
5090                .map(|v| {
5091                    v.as_array()
5092                        .ok_or_else(|| {
5093                            IoError::JsonFormat("split orient 'index' must be an array".into())
5094                        })
5095                        .map(|arr| {
5096                            arr.iter()
5097                                .map(json_value_to_index_label)
5098                                .collect::<Vec<_>>()
5099                        })
5100                })
5101                .transpose()?;
5102
5103            let mut columns: BTreeMap<String, Vec<Scalar>> = BTreeMap::new();
5104            for name in &col_names {
5105                columns.insert(name.clone(), Vec::with_capacity(data.len()));
5106            }
5107
5108            for (row_idx, row) in data.iter().enumerate() {
5109                let arr = row
5110                    .as_array()
5111                    .ok_or_else(|| IoError::JsonFormat("each data row must be an array".into()))?;
5112                if arr.len() != col_names.len() {
5113                    return Err(IoError::JsonFormat(format!(
5114                        "split orient row {row_idx} length ({}) does not match columns length ({})",
5115                        arr.len(),
5116                        col_names.len()
5117                    )));
5118                }
5119                for (i, name) in col_names.iter().enumerate() {
5120                    let val = arr.get(i).unwrap_or(&serde_json::Value::Null);
5121                    columns
5122                        .get_mut(name)
5123                        .ok_or_else(|| {
5124                            IoError::JsonFormat(format!(
5125                                "split orient missing column accumulator for '{name}'"
5126                            ))
5127                        })?
5128                        .push(json_value_to_scalar(val));
5129                }
5130            }
5131
5132            let row_count = data.len() as i64;
5133            let mut out = BTreeMap::new();
5134            for (name, vals) in columns {
5135                out.insert(name, column_from_json_values(vals)?);
5136            }
5137            let index = match explicit_index {
5138                Some(labels) => {
5139                    if labels.len() != row_count as usize {
5140                        return Err(IoError::JsonFormat(format!(
5141                            "split orient index length ({}) must match data row count ({row_count})",
5142                            labels.len()
5143                        )));
5144                    }
5145                    Index::new(labels)
5146                }
5147                None => Index::from_i64((0..row_count).collect()),
5148            };
5149            let frame = DataFrame::new_with_column_order(index, out, col_names)?;
5150            promote_synthetic_row_multiindex_if_present(&frame)
5151        }
5152        JsonOrient::Values => {
5153            let rows = parsed
5154                .as_array()
5155                .ok_or_else(|| IoError::JsonFormat("expected array for values orient".into()))?;
5156
5157            if rows.is_empty() {
5158                return Ok(DataFrame::new(Index::new(Vec::new()), BTreeMap::new())?);
5159            }
5160
5161            let mut width = 0usize;
5162            for row in rows {
5163                let arr = row.as_array().ok_or_else(|| {
5164                    IoError::JsonFormat("each values row must be an array".into())
5165                })?;
5166                width = width.max(arr.len());
5167            }
5168
5169            let column_order: Vec<String> = (0..width).map(|idx| idx.to_string()).collect();
5170            let mut columns: BTreeMap<String, Vec<Scalar>> = column_order
5171                .iter()
5172                .cloned()
5173                .map(|name| (name, Vec::with_capacity(rows.len())))
5174                .collect();
5175
5176            for row in rows {
5177                let arr = row.as_array().ok_or_else(|| {
5178                    IoError::JsonFormat("each values row must be an array".into())
5179                })?;
5180                for (col_idx, name) in column_order.iter().enumerate() {
5181                    let val = arr.get(col_idx).unwrap_or(&serde_json::Value::Null);
5182                    columns
5183                        .get_mut(name)
5184                        .ok_or_else(|| {
5185                            IoError::JsonFormat(format!(
5186                                "values orient missing column accumulator for '{name}'"
5187                            ))
5188                        })?
5189                        .push(json_value_to_scalar(val));
5190                }
5191            }
5192
5193            let mut out = BTreeMap::new();
5194            for (name, vals) in columns {
5195                out.insert(name, column_from_json_values(vals)?);
5196            }
5197            let index = Index::from_i64((0..rows.len() as i64).collect());
5198            let frame = DataFrame::new_with_column_order(index, out, column_order)?;
5199            promote_synthetic_row_multiindex_if_present(&frame)
5200        }
5201    }
5202}
5203
5204pub fn write_json_string(frame: &DataFrame, orient: JsonOrient) -> Result<String, IoError> {
5205    if frame.row_multiindex().is_some() && orient != JsonOrient::Values {
5206        let materialized = materialize_synthetic_row_multiindex_columns(frame)?;
5207        return write_json_string(&materialized, orient);
5208    }
5209
5210    let headers: Vec<String> = frame.column_names().into_iter().cloned().collect();
5211    let row_count = frame.index().len();
5212    let column_float_promotions = headers
5213        .iter()
5214        .map(|name| {
5215            frame
5216                .column(name)
5217                .is_some_and(|column| column_promotes_int_json_values_to_float(column.values()))
5218        })
5219        .collect::<Vec<_>>();
5220
5221    match orient {
5222        JsonOrient::Records => {
5223            let mut records = Vec::with_capacity(row_count);
5224            for row_idx in 0..row_count {
5225                let mut obj = serde_json::Map::new();
5226                for (name, promote_int_to_float) in
5227                    headers.iter().zip(column_float_promotions.iter())
5228                {
5229                    let val = frame
5230                        .column(name)
5231                        .and_then(|c| c.value(row_idx))
5232                        .map(|value| {
5233                            scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5234                        })
5235                        .unwrap_or(serde_json::Value::Null);
5236                    obj.insert(name.clone(), val);
5237                }
5238                records.push(serde_json::Value::Object(obj));
5239            }
5240            Ok(serde_json::to_string(&records)?)
5241        }
5242        JsonOrient::Columns => {
5243            let mut outer = serde_json::Map::new();
5244            for (name, promote_int_to_float) in headers.iter().zip(column_float_promotions.iter()) {
5245                let mut col_obj = serde_json::Map::new();
5246                if let Some(col) = frame.column(name) {
5247                    for (label, val) in frame.index().labels().iter().zip(col.values()) {
5248                        let key = index_label_json_key(label);
5249                        if col_obj
5250                            .insert(
5251                                key.clone(),
5252                                scalar_to_json_with_column_promotion(val, *promote_int_to_float),
5253                            )
5254                            .is_some()
5255                        {
5256                            return Err(IoError::JsonFormat(format!(
5257                                "columns orient cannot encode duplicate index label key: {key}"
5258                            )));
5259                        }
5260                    }
5261                }
5262                outer.insert(name.clone(), serde_json::Value::Object(col_obj));
5263            }
5264            Ok(serde_json::to_string(&serde_json::Value::Object(outer))?)
5265        }
5266        JsonOrient::Index => {
5267            let mut outer = serde_json::Map::new();
5268            for row_idx in 0..row_count {
5269                let mut row_obj = serde_json::Map::new();
5270                for (name, promote_int_to_float) in
5271                    headers.iter().zip(column_float_promotions.iter())
5272                {
5273                    let val = frame
5274                        .column(name)
5275                        .and_then(|c| c.value(row_idx))
5276                        .map(|value| {
5277                            scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5278                        })
5279                        .unwrap_or(serde_json::Value::Null);
5280                    row_obj.insert(name.clone(), val);
5281                }
5282
5283                let row_label = index_label_json_key(&frame.index().labels()[row_idx]);
5284                if outer
5285                    .insert(row_label.clone(), serde_json::Value::Object(row_obj))
5286                    .is_some()
5287                {
5288                    return Err(IoError::JsonFormat(format!(
5289                        "index orient cannot encode duplicate index label key: {row_label}"
5290                    )));
5291                }
5292            }
5293            Ok(serde_json::to_string(&serde_json::Value::Object(outer))?)
5294        }
5295        JsonOrient::Split => {
5296            let col_array: Vec<serde_json::Value> = headers
5297                .iter()
5298                .map(|h| serde_json::Value::String(h.clone()))
5299                .collect();
5300            let index_array: Vec<serde_json::Value> = frame
5301                .index()
5302                .labels()
5303                .iter()
5304                .map(index_label_to_json)
5305                .collect();
5306
5307            let mut data = Vec::with_capacity(row_count);
5308            for row_idx in 0..row_count {
5309                let row: Vec<serde_json::Value> = headers
5310                    .iter()
5311                    .zip(column_float_promotions.iter())
5312                    .map(|(name, promote_int_to_float)| {
5313                        frame
5314                            .column(name)
5315                            .and_then(|c| c.value(row_idx))
5316                            .map(|value| {
5317                                scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5318                            })
5319                            .unwrap_or(serde_json::Value::Null)
5320                    })
5321                    .collect();
5322                data.push(serde_json::Value::Array(row));
5323            }
5324
5325            let mut obj = serde_json::Map::new();
5326            obj.insert("columns".into(), serde_json::Value::Array(col_array));
5327            obj.insert("index".into(), serde_json::Value::Array(index_array));
5328            obj.insert("data".into(), serde_json::Value::Array(data));
5329            Ok(serde_json::to_string(&serde_json::Value::Object(obj))?)
5330        }
5331        JsonOrient::Values => {
5332            let mut data = Vec::with_capacity(row_count);
5333            for row_idx in 0..row_count {
5334                let row: Vec<serde_json::Value> = headers
5335                    .iter()
5336                    .zip(column_float_promotions.iter())
5337                    .map(|(name, promote_int_to_float)| {
5338                        frame
5339                            .column(name)
5340                            .and_then(|c| c.value(row_idx))
5341                            .map(|value| {
5342                                scalar_to_json_with_column_promotion(value, *promote_int_to_float)
5343                            })
5344                            .unwrap_or(serde_json::Value::Null)
5345                    })
5346                    .collect();
5347                data.push(serde_json::Value::Array(row));
5348            }
5349            Ok(serde_json::to_string(&serde_json::Value::Array(data))?)
5350        }
5351    }
5352}
5353
5354// ── File-based JSON ────────────────────────────────────────────────────
5355
5356pub fn read_json(path: &Path, orient: JsonOrient) -> Result<DataFrame, IoError> {
5357    let content = std::fs::read_to_string(path)?;
5358    read_json_str(&content, orient)
5359}
5360
5361pub fn write_json(frame: &DataFrame, path: &Path, orient: JsonOrient) -> Result<(), IoError> {
5362    let content = write_json_string(frame, orient)?;
5363    std::fs::write(path, content)?;
5364    Ok(())
5365}
5366
5367// ── File-based Pickle ──────────────────────────────────────────────────
5368
5369/// Read a DataFrame from a Pickle file.
5370pub fn read_pickle(path: &Path) -> Result<DataFrame, IoError> {
5371    read_pickle_with_options(path, &PickleReadOptions::default())
5372}
5373
5374/// Read a DataFrame from a Pickle file with options.
5375pub fn read_pickle_with_options(
5376    path: &Path,
5377    options: &PickleReadOptions,
5378) -> Result<DataFrame, IoError> {
5379    let content = std::fs::read(path)?;
5380    read_pickle_bytes_with_options(&content, options)
5381}
5382
5383/// Write a DataFrame to a Pickle file.
5384pub fn write_pickle(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5385    write_pickle_with_options(frame, path, &PickleWriteOptions::default())
5386}
5387
5388/// Write a DataFrame to a Pickle file with options.
5389pub fn write_pickle_with_options(
5390    frame: &DataFrame,
5391    path: &Path,
5392    options: &PickleWriteOptions,
5393) -> Result<(), IoError> {
5394    let content = write_pickle_bytes_with_options(frame, options)?;
5395    std::fs::write(path, content)?;
5396    Ok(())
5397}
5398
5399// ── File-based HDF5 ────────────────────────────────────────────────────
5400
5401/// Read a DataFrame from the default HDF5 key.
5402pub fn read_hdf(path: &Path) -> Result<DataFrame, IoError> {
5403    read_hdf_with_options(path, &HdfReadOptions::default())
5404}
5405
5406/// Read a DataFrame from an explicit HDF5 key.
5407pub fn read_hdf_key(path: &Path, key: &str) -> Result<DataFrame, IoError> {
5408    read_hdf_with_options(
5409        path,
5410        &HdfReadOptions {
5411            key: key.to_owned(),
5412        },
5413    )
5414}
5415
5416/// Read a DataFrame from an HDF5 file with options.
5417#[cfg(feature = "hdf5")]
5418pub fn read_hdf_with_options(path: &Path, options: &HdfReadOptions) -> Result<DataFrame, IoError> {
5419    let key = normalize_hdf5_key(&options.key)?;
5420    let dataset_path = hdf5_payload_path(&key);
5421    let file = Hdf5File::open(path).map_err(hdf5_error)?;
5422    let dataset = file.dataset(&dataset_path).map_err(|err| {
5423        IoError::Hdf5(format!(
5424            "missing FrankenPandas payload dataset '{dataset_path}': {err}"
5425        ))
5426    })?;
5427    let payload = dataset.read_raw::<u8>().map_err(hdf5_error)?;
5428    read_pickle_bytes(&payload).map_err(|err| {
5429        IoError::Hdf5(format!(
5430            "invalid FrankenPandas payload at key '{key}': {err}"
5431        ))
5432    })
5433}
5434
5435/// Read a DataFrame from an HDF5 file with options.
5436#[cfg(not(feature = "hdf5"))]
5437pub fn read_hdf_with_options(
5438    _path: &Path,
5439    _options: &HdfReadOptions,
5440) -> Result<DataFrame, IoError> {
5441    hdf5_feature_disabled()
5442}
5443
5444/// Write a DataFrame to the default HDF5 key.
5445pub fn write_hdf(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5446    write_hdf_with_options(frame, path, &HdfWriteOptions::default())
5447}
5448
5449/// Write a Series to the default HDF5 key.
5450///
5451/// Converts the Series to a single-column DataFrame and writes it.
5452pub fn write_hdf_series(series: &Series, path: &Path) -> Result<(), IoError> {
5453    let frame = series
5454        .to_frame(Some(series.name()))
5455        .map_err(|e| IoError::Hdf5(format!("Series to DataFrame conversion: {e}")))?;
5456    write_hdf(&frame, path)
5457}
5458
5459/// Write a Series to an explicit HDF5 key.
5460pub fn write_hdf_series_key(series: &Series, path: &Path, key: &str) -> Result<(), IoError> {
5461    let frame = series
5462        .to_frame(Some(series.name()))
5463        .map_err(|e| IoError::Hdf5(format!("Series to DataFrame conversion: {e}")))?;
5464    write_hdf_key(&frame, path, key)
5465}
5466
5467/// Write a DataFrame to an explicit HDF5 key.
5468pub fn write_hdf_key(frame: &DataFrame, path: &Path, key: &str) -> Result<(), IoError> {
5469    write_hdf_with_options(
5470        frame,
5471        path,
5472        &HdfWriteOptions {
5473            key: key.to_owned(),
5474        },
5475    )
5476}
5477
5478/// Write a DataFrame to an HDF5 file with options.
5479#[cfg(feature = "hdf5")]
5480pub fn write_hdf_with_options(
5481    frame: &DataFrame,
5482    path: &Path,
5483    options: &HdfWriteOptions,
5484) -> Result<(), IoError> {
5485    let key = normalize_hdf5_key(&options.key)?;
5486    let payload = write_pickle_bytes(frame)?;
5487    let file = Hdf5File::create(path).map_err(hdf5_error)?;
5488    let group = file.create_group(&key).map_err(hdf5_error)?;
5489    group
5490        .new_dataset_builder()
5491        .with_data(payload.as_slice())
5492        .create(HDF5_PAYLOAD_DATASET)
5493        .map_err(hdf5_error)?;
5494    file.flush().map_err(hdf5_error)?;
5495    Ok(())
5496}
5497
5498/// Write a DataFrame to an HDF5 file with options.
5499#[cfg(not(feature = "hdf5"))]
5500pub fn write_hdf_with_options(
5501    _frame: &DataFrame,
5502    _path: &Path,
5503    _options: &HdfWriteOptions,
5504) -> Result<(), IoError> {
5505    hdf5_feature_disabled()
5506}
5507
5508#[cfg(feature = "hdf5")]
5509fn normalize_hdf5_key(key: &str) -> Result<String, IoError> {
5510    let trimmed = key.trim_matches('/');
5511    if trimmed.is_empty() {
5512        return Err(IoError::Hdf5(
5513            "hdf5 key must name a non-root group".to_owned(),
5514        ));
5515    }
5516
5517    for part in trimmed.split('/') {
5518        if part.is_empty() || part == "." || part == ".." {
5519            return Err(IoError::Hdf5(format!("invalid hdf5 key '{key}'")));
5520        }
5521        if part == HDF5_PAYLOAD_DATASET {
5522            return Err(IoError::Hdf5(format!(
5523                "hdf5 key '{key}' uses reserved FrankenPandas dataset name"
5524            )));
5525        }
5526    }
5527
5528    Ok(trimmed.to_owned())
5529}
5530
5531#[cfg(feature = "hdf5")]
5532fn hdf5_payload_path(key: &str) -> String {
5533    format!("{key}/{HDF5_PAYLOAD_DATASET}")
5534}
5535
5536#[cfg(feature = "hdf5")]
5537fn hdf5_error(err: hdf5::Error) -> IoError {
5538    IoError::Hdf5(err.to_string())
5539}
5540
5541#[cfg(not(feature = "hdf5"))]
5542fn hdf5_feature_disabled<T>() -> Result<T, IoError> {
5543    Err(IoError::Hdf5(
5544        "hdf5 support is disabled; enable the fp-io `hdf5` feature".to_owned(),
5545    ))
5546}
5547
5548// ── File-based Stata ───────────────────────────────────────────────────
5549
5550/// Read a DataFrame from a Stata DTA file.
5551pub fn read_stata(path: &Path) -> Result<DataFrame, IoError> {
5552    let content = std::fs::read(path)?;
5553    read_stata_bytes(&content)
5554}
5555
5556/// Write a DataFrame to a Stata DTA file.
5557pub fn write_stata(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5558    write_stata_with_options(frame, path, &StataWriteOptions::default())
5559}
5560
5561/// Write a DataFrame to a Stata DTA file with explicit options.
5562pub fn write_stata_with_options(
5563    frame: &DataFrame,
5564    path: &Path,
5565    options: &StataWriteOptions,
5566) -> Result<(), IoError> {
5567    let content = write_stata_bytes_with_options(frame, options)?;
5568    std::fs::write(path, content)?;
5569    Ok(())
5570}
5571
5572// ── JSONL (JSON Lines) I/O ──────────────────────────────────────────────
5573
5574/// Write a DataFrame to JSONL (JSON Lines) format.
5575///
5576/// Matches `pd.DataFrame.to_json(orient='records', lines=True)`.
5577/// Each row is written as a separate JSON object on its own line,
5578/// with no enclosing array. This format is standard for streaming
5579/// data pipelines and log processing.
5580pub fn write_jsonl_string(frame: &DataFrame) -> Result<String, IoError> {
5581    let headers: Vec<String> = frame.column_names().into_iter().cloned().collect();
5582    let row_count = frame.index().len();
5583    let column_float_promotions = headers
5584        .iter()
5585        .map(|name| {
5586            frame
5587                .column(name)
5588                .is_some_and(|column| column_promotes_int_json_values_to_float(column.values()))
5589        })
5590        .collect::<Vec<_>>();
5591
5592    let mut lines = Vec::with_capacity(row_count);
5593    for row_idx in 0..row_count {
5594        let mut obj = serde_json::Map::new();
5595        for (name, promote_int_to_float) in headers.iter().zip(column_float_promotions.iter()) {
5596            let val = frame
5597                .column(name)
5598                .and_then(|c| c.value(row_idx))
5599                .map(|value| scalar_to_json_with_column_promotion(value, *promote_int_to_float))
5600                .unwrap_or(serde_json::Value::Null);
5601            obj.insert(name.clone(), val);
5602        }
5603        lines.push(serde_json::to_string(&serde_json::Value::Object(obj))?);
5604    }
5605
5606    Ok(lines.join("\n"))
5607}
5608
5609/// Read a DataFrame from JSONL (JSON Lines) format.
5610///
5611/// Matches `pd.read_json(input, lines=True)`.
5612/// Each line must be a valid JSON object with the same keys.
5613/// Per br-frankenpandas-9l8gd: row cap to prevent DoS via hostile input.
5614/// Hostile JSONL with billions of lines would otherwise grow `all_rows`
5615/// unbounded before the column-build allocation.
5616const READ_JSONL_MAX_ROWS: usize = 100_000_000;
5617
5618pub fn read_jsonl_str(input: &str) -> Result<DataFrame, IoError> {
5619    let mut all_rows: Vec<serde_json::Map<String, serde_json::Value>> = Vec::new();
5620
5621    for line in input.lines() {
5622        let trimmed = line.trim();
5623        if trimmed.is_empty() {
5624            continue;
5625        }
5626        // Per br-frankenpandas-9l8gd: reject hostile inputs that would
5627        // exhaust memory before the column-build allocation step.
5628        if all_rows.len() >= READ_JSONL_MAX_ROWS {
5629            return Err(IoError::JsonFormat(format!(
5630                "JSONL input exceeds maximum of {READ_JSONL_MAX_ROWS} rows"
5631            )));
5632        }
5633        let parsed = parse_json_value_allowing_pandas_nan(trimmed)?;
5634        let obj = parsed
5635            .as_object()
5636            .ok_or_else(|| IoError::JsonFormat("JSONL: each line must be a JSON object".into()))?;
5637        all_rows.push(obj.clone());
5638    }
5639
5640    if all_rows.is_empty() {
5641        return DataFrame::new(Index::new(Vec::new()), BTreeMap::new()).map_err(IoError::Frame);
5642    }
5643
5644    // Collect column names as the UNION of all keys across all rows.
5645    // This matches pandas behavior: missing keys in a row become null.
5646    let mut col_name_set = std::collections::BTreeSet::new();
5647    let mut col_names_ordered: Vec<String> = Vec::new();
5648    for row in &all_rows {
5649        for key in row.keys() {
5650            if col_name_set.insert(key.clone()) {
5651                col_names_ordered.push(key.clone());
5652            }
5653        }
5654    }
5655    let col_names = col_names_ordered;
5656    let mut columns: Vec<Vec<Scalar>> = col_names
5657        .iter()
5658        .map(|_| Vec::with_capacity(all_rows.len()))
5659        .collect();
5660
5661    for row in &all_rows {
5662        for (col_idx, name) in col_names.iter().enumerate() {
5663            let val = row.get(name).unwrap_or(&serde_json::Value::Null);
5664            columns[col_idx].push(json_value_to_scalar(val));
5665        }
5666    }
5667
5668    let mut out_columns = BTreeMap::new();
5669    let mut column_order = Vec::new();
5670    for (name, values) in col_names.into_iter().zip(columns) {
5671        out_columns.insert(name.clone(), column_from_json_values(values)?);
5672        column_order.push(name);
5673    }
5674
5675    let index = Index::from_i64((0..all_rows.len() as i64).collect());
5676    Ok(DataFrame::new_with_column_order(
5677        index,
5678        out_columns,
5679        column_order,
5680    )?)
5681}
5682
5683/// Write a DataFrame to a JSONL file.
5684pub fn write_jsonl(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
5685    let content = write_jsonl_string(frame)?;
5686    std::fs::write(path, content)?;
5687    Ok(())
5688}
5689
5690/// Read a DataFrame from a JSONL file.
5691pub fn read_jsonl(path: &Path) -> Result<DataFrame, IoError> {
5692    let content = std::fs::read_to_string(path)?;
5693    read_jsonl_str(&content)
5694}
5695
5696// ── Parquet I/O ─────────────────────────────────────────────────────────────
5697
5698/// Convert an fp-types DType to an Arrow DataType.
5699fn dtype_to_arrow(dtype: DType) -> ArrowDataType {
5700    match dtype {
5701        DType::Int64 | DType::Int64Nullable => ArrowDataType::Int64,
5702        DType::Float64 => ArrowDataType::Float64,
5703        DType::Utf8 => ArrowDataType::Utf8,
5704        DType::Categorical => ArrowDataType::Utf8,
5705        DType::Bool | DType::BoolNullable => ArrowDataType::Boolean,
5706        DType::Null => ArrowDataType::Utf8, // fallback: null-only columns as string
5707        DType::Timedelta64 => ArrowDataType::Int64, // store as nanoseconds
5708        DType::Datetime64 => ArrowDataType::Int64, // store as nanoseconds
5709        DType::Period => ArrowDataType::Int64, // store as ordinal
5710        DType::Interval => ArrowDataType::Utf8, // store as string until arrow interval lands
5711        DType::Sparse => ArrowDataType::Utf8, // marker fallback until sparse arrays land
5712    }
5713}
5714
5715fn column_to_arrow_array(column: &Column) -> Result<Arc<dyn Array>, IoError> {
5716    let arr: Arc<dyn Array> = match column.dtype() {
5717        DType::Int64 | DType::Int64Nullable => {
5718            let mut builder = Int64Builder::with_capacity(column.len());
5719            for value in column.values() {
5720                match value {
5721                    Scalar::Int64(n) => builder.append_value(*n),
5722                    _ if value.is_missing() => builder.append_null(),
5723                    _ => builder.append_null(),
5724                }
5725            }
5726            Arc::new(builder.finish())
5727        }
5728        DType::Float64 => {
5729            let mut builder = Float64Builder::with_capacity(column.len());
5730            for value in column.values() {
5731                match value {
5732                    Scalar::Float64(n) => {
5733                        if n.is_nan() {
5734                            builder.append_null();
5735                        } else {
5736                            builder.append_value(*n);
5737                        }
5738                    }
5739                    _ if value.is_missing() => builder.append_null(),
5740                    _ => builder.append_null(),
5741                }
5742            }
5743            Arc::new(builder.finish())
5744        }
5745        DType::Bool | DType::BoolNullable => {
5746            let mut builder = BooleanBuilder::with_capacity(column.len());
5747            for value in column.values() {
5748                match value {
5749                    Scalar::Bool(flag) => builder.append_value(*flag),
5750                    _ if value.is_missing() => builder.append_null(),
5751                    _ => builder.append_null(),
5752                }
5753            }
5754            Arc::new(builder.finish())
5755        }
5756        DType::Utf8 | DType::Categorical | DType::Null | DType::Sparse => {
5757            let mut builder = StringBuilder::with_capacity(column.len(), column.len() * 8);
5758            for value in column.values() {
5759                match value {
5760                    Scalar::Utf8(text) => builder.append_value(text),
5761                    _ if value.is_missing() => builder.append_null(),
5762                    _ => builder.append_value(format!("{value:?}")),
5763                }
5764            }
5765            Arc::new(builder.finish())
5766        }
5767        DType::Timedelta64 => {
5768            let mut builder = Int64Builder::with_capacity(column.len());
5769            for value in column.values() {
5770                match value {
5771                    Scalar::Timedelta64(nanos) => {
5772                        if *nanos == Timedelta::NAT {
5773                            builder.append_null();
5774                        } else {
5775                            builder.append_value(*nanos);
5776                        }
5777                    }
5778                    _ if value.is_missing() => builder.append_null(),
5779                    _ => builder.append_null(),
5780                }
5781            }
5782            Arc::new(builder.finish())
5783        }
5784        DType::Datetime64 => {
5785            let mut builder = Int64Builder::with_capacity(column.len());
5786            for value in column.values() {
5787                match value {
5788                    Scalar::Datetime64(nanos) => {
5789                        if *nanos == Timestamp::NAT {
5790                            builder.append_null();
5791                        } else {
5792                            builder.append_value(*nanos);
5793                        }
5794                    }
5795                    _ if value.is_missing() => builder.append_null(),
5796                    _ => builder.append_null(),
5797                }
5798            }
5799            Arc::new(builder.finish())
5800        }
5801        DType::Period => {
5802            let mut builder = Int64Builder::with_capacity(column.len());
5803            for value in column.values() {
5804                match value {
5805                    Scalar::Period(ordinal) => {
5806                        if *ordinal == i64::MIN {
5807                            builder.append_null();
5808                        } else {
5809                            builder.append_value(*ordinal);
5810                        }
5811                    }
5812                    _ if value.is_missing() => builder.append_null(),
5813                    _ => builder.append_null(),
5814                }
5815            }
5816            Arc::new(builder.finish())
5817        }
5818        DType::Interval => {
5819            let mut builder = StringBuilder::with_capacity(column.len(), column.len() * 32);
5820            for value in column.values() {
5821                match value {
5822                    Scalar::Interval(iv) => builder.append_value(format!("{iv}")),
5823                    _ if value.is_missing() => builder.append_null(),
5824                    _ => builder.append_null(),
5825                }
5826            }
5827            Arc::new(builder.finish())
5828        }
5829    };
5830
5831    Ok(arr)
5832}
5833
5834/// Convert a Series to its Arrow data type plus backing array.
5835///
5836/// This is the Arrow-level building block under Feather / IPC round-trips and
5837/// preserves nullable Int64 columns as Arrow null-bitmaps rather than coercing
5838/// through Float64.
5839pub fn series_to_arrow_array(series: &Series) -> Result<(ArrowDataType, Arc<dyn Array>), IoError> {
5840    let dt = dtype_to_arrow(series.column().dtype());
5841    Ok((dt, column_to_arrow_array(series.column())?))
5842}
5843
5844/// Rebuild a Series from an Arrow array and explicit dtype metadata.
5845pub fn series_from_arrow_array(
5846    name: impl Into<String>,
5847    index_labels: Vec<IndexLabel>,
5848    arr: &dyn Array,
5849    dt: &ArrowDataType,
5850) -> Result<Series, IoError> {
5851    let values = arrow_array_to_scalars(arr, dt)?;
5852    Series::from_values(name, index_labels, values).map_err(IoError::from)
5853}
5854
5855/// Build an Arrow RecordBatch from a DataFrame.
5856fn dataframe_to_record_batch(frame: &DataFrame) -> Result<RecordBatch, IoError> {
5857    let materialized = if frame.row_multiindex().is_some() {
5858        Some(materialize_synthetic_row_multiindex_columns(frame)?)
5859    } else {
5860        None
5861    };
5862    let frame = materialized.as_ref().unwrap_or(frame);
5863
5864    let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
5865    let mut fields = Vec::with_capacity(col_names.len());
5866    let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(col_names.len());
5867
5868    for name in &col_names {
5869        let col = frame
5870            .column(name)
5871            .ok_or_else(|| IoError::Parquet(format!("missing column: {name}")))?;
5872        let dt = col.dtype();
5873        fields.push(Field::new(name.as_str(), dtype_to_arrow(dt), true));
5874        let arr = column_to_arrow_array(col)?;
5875        arrays.push(arr);
5876    }
5877
5878    let schema = Arc::new(Schema::new(fields));
5879    RecordBatch::try_new(schema, arrays).map_err(|e| IoError::Parquet(e.to_string()))
5880}
5881
5882/// Convert an Arrow RecordBatch back into a DataFrame.
5883fn record_batch_to_dataframe(batch: &RecordBatch) -> Result<DataFrame, IoError> {
5884    let n_rows = batch.num_rows();
5885    let schema = batch.schema();
5886    let mut columns = BTreeMap::new();
5887    let mut col_order = Vec::new();
5888
5889    for (i, field) in schema.fields().iter().enumerate() {
5890        let name = field.name().clone();
5891        let arr = batch.column(i);
5892        let values = arrow_array_to_scalars(arr.as_ref(), field.data_type())?;
5893        let dtype = fp_dtype_for_arrow_data_type(field.data_type());
5894        let col = Column::new(dtype, values)?;
5895        columns.insert(name.clone(), col);
5896        col_order.push(name);
5897    }
5898
5899    let labels: Vec<IndexLabel> = (0..n_rows).map(|i| IndexLabel::Int64(i as i64)).collect();
5900    let index = Index::new(labels);
5901
5902    let frame = DataFrame::new_with_column_order(index, columns, col_order)?;
5903    promote_synthetic_row_multiindex_if_present(&frame)
5904}
5905
5906fn fp_dtype_for_arrow_data_type(dt: &ArrowDataType) -> DType {
5907    match dt {
5908        ArrowDataType::Int8
5909        | ArrowDataType::Int16
5910        | ArrowDataType::Int32
5911        | ArrowDataType::Int64
5912        | ArrowDataType::UInt8
5913        | ArrowDataType::UInt16
5914        | ArrowDataType::UInt32
5915        | ArrowDataType::UInt64 => DType::Int64,
5916        ArrowDataType::Float16 | ArrowDataType::Float32 | ArrowDataType::Float64 => DType::Float64,
5917        ArrowDataType::Boolean => DType::Bool,
5918        ArrowDataType::Utf8
5919        | ArrowDataType::LargeUtf8
5920        | ArrowDataType::Date32
5921        | ArrowDataType::Date64
5922        | ArrowDataType::Timestamp(_, _) => DType::Utf8,
5923        _ => DType::Utf8,
5924    }
5925}
5926
5927/// Convert an Arrow array + data type to a Vec of Scalars.
5928fn arrow_array_to_scalars(arr: &dyn Array, dt: &ArrowDataType) -> Result<Vec<Scalar>, IoError> {
5929    let len = arr.len();
5930    let mut scalars = Vec::with_capacity(len);
5931
5932    match dt {
5933        ArrowDataType::Int64 => {
5934            let typed = arr
5935                .as_any()
5936                .downcast_ref::<Int64Array>()
5937                .ok_or_else(|| IoError::Parquet("expected Int64Array".into()))?;
5938            for i in 0..len {
5939                if typed.is_null(i) {
5940                    scalars.push(Scalar::Null(NullKind::Null));
5941                } else {
5942                    scalars.push(Scalar::Int64(typed.value(i)));
5943                }
5944            }
5945        }
5946        ArrowDataType::Int32 => {
5947            let typed = arr
5948                .as_any()
5949                .downcast_ref::<arrow::array::Int32Array>()
5950                .ok_or_else(|| IoError::Parquet("expected Int32Array".into()))?;
5951            for i in 0..len {
5952                if typed.is_null(i) {
5953                    scalars.push(Scalar::Null(NullKind::Null));
5954                } else {
5955                    scalars.push(Scalar::Int64(i64::from(typed.value(i))));
5956                }
5957            }
5958        }
5959        ArrowDataType::Float64 => {
5960            let typed = arr
5961                .as_any()
5962                .downcast_ref::<Float64Array>()
5963                .ok_or_else(|| IoError::Parquet("expected Float64Array".into()))?;
5964            for i in 0..len {
5965                if typed.is_null(i) {
5966                    scalars.push(Scalar::Null(NullKind::NaN));
5967                } else {
5968                    scalars.push(Scalar::Float64(typed.value(i)));
5969                }
5970            }
5971        }
5972        ArrowDataType::Float32 => {
5973            let typed = arr
5974                .as_any()
5975                .downcast_ref::<arrow::array::Float32Array>()
5976                .ok_or_else(|| IoError::Parquet("expected Float32Array".into()))?;
5977            for i in 0..len {
5978                if typed.is_null(i) {
5979                    scalars.push(Scalar::Null(NullKind::NaN));
5980                } else {
5981                    scalars.push(Scalar::Float64(f64::from(typed.value(i))));
5982                }
5983            }
5984        }
5985        ArrowDataType::Boolean => {
5986            let typed = arr
5987                .as_any()
5988                .downcast_ref::<BooleanArray>()
5989                .ok_or_else(|| IoError::Parquet("expected BooleanArray".into()))?;
5990            for i in 0..len {
5991                if typed.is_null(i) {
5992                    scalars.push(Scalar::Null(NullKind::Null));
5993                } else {
5994                    scalars.push(Scalar::Bool(typed.value(i)));
5995                }
5996            }
5997        }
5998        ArrowDataType::Utf8 => {
5999            let typed = arr
6000                .as_any()
6001                .downcast_ref::<StringArray>()
6002                .ok_or_else(|| IoError::Parquet("expected StringArray".into()))?;
6003            for i in 0..len {
6004                if typed.is_null(i) {
6005                    scalars.push(Scalar::Null(NullKind::Null));
6006                } else {
6007                    scalars.push(Scalar::Utf8(typed.value(i).to_owned()));
6008                }
6009            }
6010        }
6011        ArrowDataType::LargeUtf8 => {
6012            let typed = arr
6013                .as_any()
6014                .downcast_ref::<arrow::array::LargeStringArray>()
6015                .ok_or_else(|| IoError::Parquet("expected LargeStringArray".into()))?;
6016            for i in 0..len {
6017                if typed.is_null(i) {
6018                    scalars.push(Scalar::Null(NullKind::Null));
6019                } else {
6020                    scalars.push(Scalar::Utf8(typed.value(i).to_owned()));
6021                }
6022            }
6023        }
6024        ArrowDataType::Date32 => {
6025            let typed = arr
6026                .as_any()
6027                .downcast_ref::<Date32Array>()
6028                .ok_or_else(|| IoError::Parquet("expected Date32Array".into()))?;
6029            for i in 0..len {
6030                if typed.is_null(i) {
6031                    scalars.push(Scalar::Null(NullKind::NaT));
6032                } else {
6033                    if let Some(date) = arrow::temporal_conversions::as_date::<
6034                        arrow::datatypes::Date32Type,
6035                    >(typed.value(i).into())
6036                    {
6037                        scalars.push(Scalar::Utf8(date.format("%Y-%m-%d").to_string()));
6038                    } else {
6039                        scalars.push(Scalar::Null(NullKind::NaT));
6040                    }
6041                }
6042            }
6043        }
6044        ArrowDataType::Date64 => {
6045            let typed = arr
6046                .as_any()
6047                .downcast_ref::<Date64Array>()
6048                .ok_or_else(|| IoError::Parquet("expected Date64Array".into()))?;
6049            for i in 0..len {
6050                if typed.is_null(i) {
6051                    scalars.push(Scalar::Null(NullKind::NaT));
6052                } else {
6053                    if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6054                        arrow::datatypes::Date64Type,
6055                    >(typed.value(i))
6056                    {
6057                        scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d").to_string()));
6058                    } else {
6059                        scalars.push(Scalar::Null(NullKind::NaT));
6060                    }
6061                }
6062            }
6063        }
6064        ArrowDataType::Timestamp(unit, _tz) => match unit {
6065            TimeUnit::Second => {
6066                let typed = arr
6067                    .as_any()
6068                    .downcast_ref::<TimestampSecondArray>()
6069                    .ok_or_else(|| IoError::Parquet("expected TimestampSecondArray".into()))?;
6070                for i in 0..len {
6071                    if typed.is_null(i) {
6072                        scalars.push(Scalar::Null(NullKind::NaT));
6073                    } else {
6074                        if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6075                            arrow::datatypes::TimestampSecondType,
6076                        >(typed.value(i))
6077                        {
6078                            scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S").to_string()));
6079                        } else {
6080                            scalars.push(Scalar::Null(NullKind::NaT));
6081                        }
6082                    }
6083                }
6084            }
6085            TimeUnit::Millisecond => {
6086                let typed = arr
6087                    .as_any()
6088                    .downcast_ref::<TimestampMillisecondArray>()
6089                    .ok_or_else(|| IoError::Parquet("expected TimestampMillisecondArray".into()))?;
6090                for i in 0..len {
6091                    if typed.is_null(i) {
6092                        scalars.push(Scalar::Null(NullKind::NaT));
6093                    } else {
6094                        if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6095                            arrow::datatypes::TimestampMillisecondType,
6096                        >(typed.value(i))
6097                        {
6098                            scalars.push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S").to_string()));
6099                        } else {
6100                            scalars.push(Scalar::Null(NullKind::NaT));
6101                        }
6102                    }
6103                }
6104            }
6105            TimeUnit::Microsecond => {
6106                let typed = arr
6107                    .as_any()
6108                    .downcast_ref::<TimestampMicrosecondArray>()
6109                    .ok_or_else(|| IoError::Parquet("expected TimestampMicrosecondArray".into()))?;
6110                for i in 0..len {
6111                    if typed.is_null(i) {
6112                        scalars.push(Scalar::Null(NullKind::NaT));
6113                    } else {
6114                        if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6115                            arrow::datatypes::TimestampMicrosecondType,
6116                        >(typed.value(i))
6117                        {
6118                            scalars
6119                                .push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S%.6f").to_string()));
6120                        } else {
6121                            scalars.push(Scalar::Null(NullKind::NaT));
6122                        }
6123                    }
6124                }
6125            }
6126            TimeUnit::Nanosecond => {
6127                let typed = arr
6128                    .as_any()
6129                    .downcast_ref::<TimestampNanosecondArray>()
6130                    .ok_or_else(|| IoError::Parquet("expected TimestampNanosecondArray".into()))?;
6131                for i in 0..len {
6132                    if typed.is_null(i) {
6133                        scalars.push(Scalar::Null(NullKind::NaT));
6134                    } else {
6135                        if let Some(dt) = arrow::temporal_conversions::as_datetime::<
6136                            arrow::datatypes::TimestampNanosecondType,
6137                        >(typed.value(i))
6138                        {
6139                            scalars
6140                                .push(Scalar::Utf8(dt.format("%Y-%m-%d %H:%M:%S%.9f").to_string()));
6141                        } else {
6142                            scalars.push(Scalar::Null(NullKind::NaT));
6143                        }
6144                    }
6145                }
6146            }
6147        },
6148        other => {
6149            return Err(IoError::Parquet(format!(
6150                "unsupported Arrow data type: {other:?}"
6151            )));
6152        }
6153    }
6154
6155    Ok(scalars)
6156}
6157
6158/// Write a DataFrame to an in-memory Parquet buffer.
6159pub fn write_parquet_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6160    let batch = dataframe_to_record_batch(frame)?;
6161    let mut buf = Vec::new();
6162    let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), None)
6163        .map_err(|e| IoError::Parquet(e.to_string()))?;
6164    writer
6165        .write(&batch)
6166        .map_err(|e| IoError::Parquet(e.to_string()))?;
6167    writer
6168        .close()
6169        .map_err(|e| IoError::Parquet(e.to_string()))?;
6170    Ok(buf)
6171}
6172
6173/// Read a DataFrame from in-memory Parquet bytes.
6174pub fn read_parquet_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
6175    let b = bytes::Bytes::from(data.to_vec());
6176    let reader = ParquetRecordBatchReaderBuilder::try_new(b)
6177        .map_err(|e| IoError::Parquet(e.to_string()))?
6178        .build()
6179        .map_err(|e| IoError::Parquet(e.to_string()))?;
6180
6181    let mut all_frames: Vec<DataFrame> = Vec::new();
6182    for batch_result in reader {
6183        let batch: RecordBatch =
6184            batch_result.map_err(|e: arrow::error::ArrowError| IoError::Parquet(e.to_string()))?;
6185        all_frames.push(record_batch_to_dataframe(&batch)?);
6186    }
6187
6188    if all_frames.is_empty() {
6189        // Return empty DataFrame
6190        return Ok(DataFrame::new_with_column_order(
6191            Index::new(vec![]),
6192            BTreeMap::new(),
6193            vec![],
6194        )?);
6195    }
6196
6197    // For a single batch (common case), return directly
6198    if all_frames.len() == 1 {
6199        if let Some(frame) = all_frames.into_iter().next() {
6200            return Ok(frame);
6201        }
6202        return Err(IoError::Parquet(
6203            "parquet reader produced zero record batches".to_owned(),
6204        ));
6205    }
6206
6207    // Multiple batches: concatenate via fp_frame::concat_dataframes
6208    let refs: Vec<&DataFrame> = all_frames.iter().collect();
6209    fp_frame::concat_dataframes(&refs).map_err(IoError::from)
6210}
6211
6212/// Write a DataFrame to a Parquet file.
6213pub fn write_parquet(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6214    let bytes = write_parquet_bytes(frame)?;
6215    std::fs::write(path, bytes)?;
6216    Ok(())
6217}
6218
6219/// Read a DataFrame from a Parquet file.
6220pub fn read_parquet(path: &Path) -> Result<DataFrame, IoError> {
6221    let data = std::fs::read(path)?;
6222    read_parquet_bytes(&data)
6223}
6224
6225// ── ORC I/O ────────────────────────────────────────────────────────────────
6226
6227/// Write a DataFrame to an in-memory ORC buffer.
6228///
6229/// Uses the shared Arrow conversion path, then delegates ORC physical encoding
6230/// to `orc-rust`.
6231pub fn write_orc_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6232    let batch = dataframe_to_record_batch(frame)?;
6233    let mut buf = Vec::new();
6234    let mut writer = OrcArrowWriterBuilder::new(&mut buf, batch.schema())
6235        .try_build()
6236        .map_err(|err| IoError::Orc(err.to_string()))?;
6237    writer
6238        .write(&batch)
6239        .map_err(|err| IoError::Orc(err.to_string()))?;
6240    writer
6241        .close()
6242        .map_err(|err| IoError::Orc(err.to_string()))?;
6243    Ok(buf)
6244}
6245
6246/// Read a DataFrame from in-memory ORC bytes.
6247pub fn read_orc_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
6248    let bytes = bytes::Bytes::from(data.to_vec());
6249    let reader = OrcArrowReaderBuilder::try_new(bytes)
6250        .map_err(|err| IoError::Orc(err.to_string()))?
6251        .build();
6252
6253    let mut all_frames: Vec<DataFrame> = Vec::new();
6254    for batch_result in reader {
6255        let batch = batch_result.map_err(|err| IoError::Orc(err.to_string()))?;
6256        all_frames.push(record_batch_to_dataframe(&batch)?);
6257    }
6258
6259    if all_frames.is_empty() {
6260        return Ok(DataFrame::new_with_column_order(
6261            Index::new(vec![]),
6262            BTreeMap::new(),
6263            vec![],
6264        )?);
6265    }
6266
6267    if all_frames.len() == 1 {
6268        if let Some(frame) = all_frames.into_iter().next() {
6269            return Ok(frame);
6270        }
6271        return Err(IoError::Orc(
6272            "orc reader produced zero record batches".to_owned(),
6273        ));
6274    }
6275
6276    let refs: Vec<&DataFrame> = all_frames.iter().collect();
6277    fp_frame::concat_dataframes(&refs).map_err(IoError::from)
6278}
6279
6280/// Write a DataFrame to an ORC file.
6281pub fn write_orc(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6282    let bytes = write_orc_bytes(frame)?;
6283    std::fs::write(path, bytes)?;
6284    Ok(())
6285}
6286
6287/// Read a DataFrame from an ORC file.
6288pub fn read_orc(path: &Path) -> Result<DataFrame, IoError> {
6289    let data = std::fs::read(path)?;
6290    read_orc_bytes(&data)
6291}
6292
6293// ── Excel (xlsx) I/O ────────────────────────────────────────────────────
6294
6295/// Options for reading Excel files.
6296#[derive(Debug, Clone)]
6297pub struct ExcelReadOptions {
6298    /// Sheet name to read. If `None`, reads the first sheet.
6299    pub sheet_name: Option<String>,
6300    /// Whether the first row contains column headers.
6301    pub has_headers: bool,
6302    /// Read only these columns (by name). `None` means read all.
6303    /// Matches pandas `usecols` parameter for label-based selection.
6304    pub usecols: Option<Vec<String>>,
6305    /// Explicit column names to use instead of worksheet headers or
6306    /// auto-generated `column_N` names. Matches pandas `names=...`.
6307    pub names: Option<Vec<String>>,
6308    /// Optional column to use as the DataFrame index.
6309    pub index_col: Option<String>,
6310    /// Number of initial rows to skip before reading headers/data.
6311    pub skip_rows: usize,
6312}
6313
6314impl Default for ExcelReadOptions {
6315    fn default() -> Self {
6316        Self {
6317            sheet_name: None,
6318            has_headers: true,
6319            usecols: None,
6320            names: None,
6321            index_col: None,
6322            skip_rows: 0,
6323        }
6324    }
6325}
6326
6327/// Convert a calamine `Data` cell value to a `Scalar`.
6328fn excel_cell_to_scalar(cell: &calamine::Data) -> Scalar {
6329    match cell {
6330        calamine::Data::Int(v) => Scalar::Int64(*v),
6331        calamine::Data::Float(v) => {
6332            if v.is_nan() {
6333                Scalar::Null(NullKind::NaN)
6334            } else if v.fract() == 0.0 && *v >= i64::MIN as f64 && *v <= i64::MAX as f64 {
6335                // Excel stores integers as floats; recover Int64 for whole numbers.
6336                Scalar::Int64(*v as i64)
6337            } else {
6338                Scalar::Float64(*v)
6339            }
6340        }
6341        calamine::Data::String(s) => {
6342            if s.is_empty() {
6343                Scalar::Null(NullKind::Null)
6344            } else {
6345                Scalar::Utf8(s.clone())
6346            }
6347        }
6348        calamine::Data::Bool(b) => Scalar::Bool(*b),
6349        calamine::Data::Empty => Scalar::Null(NullKind::Null),
6350        calamine::Data::DateTime(dt) => {
6351            // Convert ExcelDateTime to string representation for now.
6352            Scalar::Utf8(format!("{dt}"))
6353        }
6354        calamine::Data::DateTimeIso(s) => Scalar::Utf8(s.clone()),
6355        calamine::Data::DurationIso(s) => Scalar::Utf8(s.clone()),
6356        calamine::Data::Error(e) => Scalar::Utf8(format!("#ERROR:{e:?}")),
6357    }
6358}
6359
6360/// Convert a Scalar to an IndexLabel, handling float precision correctly.
6361fn scalar_to_index_label(scalar: Scalar) -> IndexLabel {
6362    match scalar {
6363        Scalar::Int64(v) => IndexLabel::Int64(v),
6364        Scalar::Utf8(s) => IndexLabel::Utf8(s),
6365        Scalar::Float64(v) if v.fract() == 0.0 && v >= i64::MIN as f64 && v <= i64::MAX as f64 => {
6366            IndexLabel::Int64(v as i64)
6367        }
6368        Scalar::Float64(v) => IndexLabel::Utf8(v.to_string()),
6369        Scalar::Bool(b) => IndexLabel::Utf8(if b { "True" } else { "False" }.to_string()),
6370        _ => IndexLabel::Utf8(String::new()),
6371    }
6372}
6373
6374fn infer_writer_emitted_default_excel_index_col(
6375    headers: &[String],
6376    header_generated: &[bool],
6377    columns: &[Vec<Scalar>],
6378    options: &ExcelReadOptions,
6379) -> Option<usize> {
6380    if !options.has_headers
6381        || options.index_col.is_some()
6382        || options.usecols.is_some()
6383        || options.names.is_some()
6384    {
6385        return None;
6386    }
6387
6388    if headers.first()?.as_str() != "column_0"
6389        || !header_generated.first().copied().unwrap_or(false)
6390    {
6391        return None;
6392    }
6393
6394    let first_col = columns.first()?;
6395    if first_col
6396        .iter()
6397        .enumerate()
6398        .all(|(idx, scalar)| matches!(scalar, Scalar::Int64(value) if *value == idx as i64))
6399    {
6400        Some(0)
6401    } else {
6402        None
6403    }
6404}
6405
6406/// Shared parsing logic for Excel data after extracting rows from a workbook.
6407fn parse_excel_rows(
6408    rows: Vec<Vec<calamine::Data>>,
6409    options: &ExcelReadOptions,
6410) -> Result<DataFrame, IoError> {
6411    if rows.is_empty() {
6412        return DataFrame::new(Index::new(Vec::new()), BTreeMap::new()).map_err(IoError::Frame);
6413    }
6414
6415    let resolve_names = |width: usize| -> Result<Option<Vec<String>>, IoError> {
6416        options.names.as_ref().map_or(Ok(None), |names| {
6417            if names.len() == width {
6418                Ok(Some(names.clone()))
6419            } else {
6420                Err(IoError::Excel(format!(
6421                    "expected {width} column names, got {}",
6422                    names.len()
6423                )))
6424            }
6425        })
6426    };
6427
6428    // Extract headers.
6429    let (headers, header_generated, data_rows) = if options.has_headers {
6430        let header_row = &rows[0];
6431        let header_width = header_row.len();
6432        let provided_names = resolve_names(header_width)?;
6433        let (headers, header_generated): (Vec<_>, Vec<_>) = if let Some(names) = provided_names {
6434            (names, vec![false; header_width])
6435        } else {
6436            let header_pairs: Vec<(String, bool)> = header_row
6437                .iter()
6438                .enumerate()
6439                .map(|(i, cell)| match cell {
6440                    calamine::Data::String(s) if !s.is_empty() => (s.clone(), false),
6441                    _ => (format!("column_{i}"), true),
6442                })
6443                .collect();
6444            header_pairs.into_iter().unzip()
6445        };
6446        (headers, header_generated, &rows[1..])
6447    } else {
6448        let ncols = rows.iter().map(Vec::len).max().unwrap_or(0);
6449        let provided_names = resolve_names(ncols)?;
6450        let (headers, header_generated) = if let Some(names) = provided_names {
6451            (names, vec![false; ncols])
6452        } else {
6453            let headers: Vec<String> = (0..ncols).map(|i| format!("column_{i}")).collect();
6454            let header_generated = vec![true; ncols];
6455            (headers, header_generated)
6456        };
6457        (headers, header_generated, rows.as_slice())
6458    };
6459    reject_duplicate_headers(&headers)?;
6460
6461    if let Some(ref usecols) = options.usecols {
6462        validate_usecols(&headers, usecols)?;
6463    }
6464
6465    let ncols = headers.len();
6466
6467    // Accumulate columns.
6468    let mut columns: Vec<Vec<Scalar>> = (0..ncols)
6469        .map(|_| Vec::with_capacity(data_rows.len()))
6470        .collect();
6471
6472    for row in data_rows {
6473        for (col_idx, col_vec) in columns.iter_mut().enumerate() {
6474            let cell = row.get(col_idx).unwrap_or(&calamine::Data::Empty);
6475            col_vec.push(excel_cell_to_scalar(cell));
6476        }
6477    }
6478
6479    let (headers, header_generated, columns) = if let Some(ref usecols) = options.usecols {
6480        let mut filtered_headers = Vec::new();
6481        let mut filtered_generated = Vec::new();
6482        let mut filtered_columns = Vec::new();
6483        for ((name, generated), values) in headers.into_iter().zip(header_generated).zip(columns) {
6484            if usecols.contains(&name) {
6485                filtered_headers.push(name);
6486                filtered_generated.push(generated);
6487                filtered_columns.push(values);
6488            }
6489        }
6490        (filtered_headers, filtered_generated, filtered_columns)
6491    } else {
6492        (headers, header_generated, columns)
6493    };
6494
6495    // Handle index_col if specified.
6496    let index_col_idx = if let Some(ref idx_name) = options.index_col {
6497        let pos = headers.iter().position(|h| h == idx_name);
6498        if pos.is_none() {
6499            return Err(IoError::MissingIndexColumn(idx_name.clone()));
6500        }
6501        pos
6502    } else {
6503        infer_writer_emitted_default_excel_index_col(&headers, &header_generated, &columns, options)
6504    };
6505
6506    let index_name = index_col_idx.and_then(|idx_pos| {
6507        if !header_generated[idx_pos] {
6508            Some(headers[idx_pos].clone())
6509        } else {
6510            None
6511        }
6512    });
6513
6514    let mut out_columns = BTreeMap::new();
6515    let mut column_order = Vec::new();
6516
6517    for (idx, (name, values)) in headers.into_iter().zip(columns).enumerate() {
6518        if Some(idx) == index_col_idx {
6519            continue; // skip index column from data columns
6520        }
6521        out_columns.insert(name.clone(), Column::from_values(values)?);
6522        column_order.push(name);
6523    }
6524
6525    let index = if let Some(idx_pos) = index_col_idx {
6526        let idx_labels: Vec<IndexLabel> = data_rows
6527            .iter()
6528            .map(|row| {
6529                let cell = row.get(idx_pos).unwrap_or(&calamine::Data::Empty);
6530                scalar_to_index_label(excel_cell_to_scalar(cell))
6531            })
6532            .collect();
6533        Index::new(idx_labels).set_names(index_name.as_deref())
6534    } else {
6535        Index::from_i64((0..data_rows.len() as i64).collect())
6536    };
6537
6538    Ok(DataFrame::new_with_column_order(
6539        index,
6540        out_columns,
6541        column_order,
6542    )?)
6543}
6544
6545/// Read an Excel (.xlsx/.xls/.xlsb/.ods) file into a DataFrame.
6546///
6547/// Matches `pd.read_excel(path)` for basic usage.
6548pub fn read_excel(path: &Path, options: &ExcelReadOptions) -> Result<DataFrame, IoError> {
6549    use calamine::{Reader, open_workbook_auto};
6550
6551    let mut workbook = open_workbook_auto(path)
6552        .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6553
6554    let sheet_name = if let Some(ref name) = options.sheet_name {
6555        name.clone()
6556    } else {
6557        let names = workbook.sheet_names();
6558        if names.is_empty() {
6559            return Err(IoError::Excel("workbook contains no sheets".into()));
6560        }
6561        names[0].clone()
6562    };
6563
6564    let range = workbook
6565        .worksheet_range(&sheet_name)
6566        .map_err(|e| IoError::Excel(format!("cannot read sheet '{sheet_name}': {e}")))?;
6567
6568    let rows: Vec<Vec<calamine::Data>> = range
6569        .rows()
6570        .skip(options.skip_rows)
6571        .map(|r| r.to_vec())
6572        .collect();
6573
6574    parse_excel_rows(rows, options)
6575}
6576
6577pub fn read_excel_with_index_cols(
6578    path: &Path,
6579    options: &ExcelReadOptions,
6580    index_cols: &[&str],
6581) -> Result<DataFrame, IoError> {
6582    let frame = read_excel(path, options)?;
6583    promote_frame_index_columns(&frame, index_cols)
6584}
6585
6586/// Read Excel from in-memory bytes.
6587pub fn read_excel_bytes(data: &[u8], options: &ExcelReadOptions) -> Result<DataFrame, IoError> {
6588    use calamine::{Reader, open_workbook_auto_from_rs};
6589
6590    let cursor = std::io::Cursor::new(data);
6591    let mut workbook = open_workbook_auto_from_rs(cursor)
6592        .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6593
6594    let sheet_name = if let Some(ref name) = options.sheet_name {
6595        name.clone()
6596    } else {
6597        let names = workbook.sheet_names();
6598        if names.is_empty() {
6599            return Err(IoError::Excel("workbook contains no sheets".into()));
6600        }
6601        names[0].clone()
6602    };
6603
6604    let range = workbook
6605        .worksheet_range(&sheet_name)
6606        .map_err(|e| IoError::Excel(format!("cannot read sheet '{sheet_name}': {e}")))?;
6607
6608    let rows: Vec<Vec<calamine::Data>> = range
6609        .rows()
6610        .skip(options.skip_rows)
6611        .map(|r| r.to_vec())
6612        .collect();
6613
6614    parse_excel_rows(rows, options)
6615}
6616
6617pub fn read_excel_bytes_with_index_cols(
6618    data: &[u8],
6619    options: &ExcelReadOptions,
6620    index_cols: &[&str],
6621) -> Result<DataFrame, IoError> {
6622    let frame = read_excel_bytes(data, options)?;
6623    promote_frame_index_columns(&frame, index_cols)
6624}
6625
6626/// Read multiple sheets from an Excel file.
6627///
6628/// Matches `pd.read_excel(path, sheet_name=[...])` when `sheet_name`
6629/// is a list of sheet names — pandas returns a dict
6630/// `{name: DataFrame}`. Pass `sheet_names=None` to read every sheet
6631/// in the workbook (pandas `sheet_name=None`).
6632///
6633/// The outer Excel reader options (`has_headers`, `index_col`,
6634/// `skip_rows`) are applied uniformly to each selected sheet. The
6635/// per-sheet `sheet_name` option on `options` is ignored here
6636/// because the explicit `sheet_names` argument drives selection.
6637/// Read multiple sheets preserving workbook iteration order.
6638///
6639/// Matches `pd.read_excel(sheet_name=None)` exactly — pandas returns
6640/// a `dict` and, since Python 3.7, dict iteration order matches
6641/// insertion order, which in turn matches workbook sheet position.
6642/// `BTreeMap` (used by `read_excel_sheets`) would alphabetize, so
6643/// this sibling returns `Vec<(String, DataFrame)>` to preserve order.
6644pub fn read_excel_sheets_ordered(
6645    path: &Path,
6646    sheet_names: Option<&[String]>,
6647    options: &ExcelReadOptions,
6648) -> Result<Vec<(String, DataFrame)>, IoError> {
6649    use calamine::{Reader, open_workbook_auto};
6650
6651    let mut workbook = open_workbook_auto(path)
6652        .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6653    let available: Vec<String> = workbook.sheet_names();
6654    // Per br-frankenpandas-c9cb4: HashSet<&str> for O(1) membership;
6655    // was O(m × n) Vec::iter().any() per requested name.
6656    let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6657    let selected: Vec<String> = match sheet_names {
6658        Some(names) => {
6659            for name in names {
6660                if !available_set.contains(name.as_str()) {
6661                    return Err(IoError::Excel(format!(
6662                        "workbook does not contain sheet {name:?}"
6663                    )));
6664                }
6665            }
6666            names.to_vec()
6667        }
6668        None => available.clone(),
6669    };
6670    if selected.is_empty() {
6671        return Err(IoError::Excel("no sheets selected".to_owned()));
6672    }
6673    let mut out = Vec::with_capacity(selected.len());
6674    for sheet in &selected {
6675        let range = workbook
6676            .worksheet_range(sheet)
6677            .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6678        let rows: Vec<Vec<calamine::Data>> = range
6679            .rows()
6680            .skip(options.skip_rows)
6681            .map(|r| r.to_vec())
6682            .collect();
6683        let frame = parse_excel_rows(rows, options)?;
6684        out.push((sheet.clone(), frame));
6685    }
6686    Ok(out)
6687}
6688
6689/// Byte-based counterpart to `read_excel_sheets_ordered`.
6690pub fn read_excel_sheets_ordered_bytes(
6691    data: &[u8],
6692    sheet_names: Option<&[String]>,
6693    options: &ExcelReadOptions,
6694) -> Result<Vec<(String, DataFrame)>, IoError> {
6695    use calamine::{Reader, open_workbook_auto_from_rs};
6696
6697    let cursor = std::io::Cursor::new(data);
6698    let mut workbook = open_workbook_auto_from_rs(cursor)
6699        .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6700    let available: Vec<String> = workbook.sheet_names();
6701    // Per br-frankenpandas-c9cb4: HashSet<&str> for O(1) membership;
6702    // was O(m × n) Vec::iter().any() per requested name.
6703    let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6704    let selected: Vec<String> = match sheet_names {
6705        Some(names) => {
6706            for name in names {
6707                if !available_set.contains(name.as_str()) {
6708                    return Err(IoError::Excel(format!(
6709                        "workbook does not contain sheet {name:?}"
6710                    )));
6711                }
6712            }
6713            names.to_vec()
6714        }
6715        None => available.clone(),
6716    };
6717    if selected.is_empty() {
6718        return Err(IoError::Excel("no sheets selected".to_owned()));
6719    }
6720    let mut out = Vec::with_capacity(selected.len());
6721    for sheet in &selected {
6722        let range = workbook
6723            .worksheet_range(sheet)
6724            .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6725        let rows: Vec<Vec<calamine::Data>> = range
6726            .rows()
6727            .skip(options.skip_rows)
6728            .map(|r| r.to_vec())
6729            .collect();
6730        let frame = parse_excel_rows(rows, options)?;
6731        out.push((sheet.clone(), frame));
6732    }
6733    Ok(out)
6734}
6735
6736pub fn read_excel_sheets(
6737    path: &Path,
6738    sheet_names: Option<&[String]>,
6739    options: &ExcelReadOptions,
6740) -> Result<BTreeMap<String, DataFrame>, IoError> {
6741    use calamine::{Reader, open_workbook_auto};
6742
6743    let mut workbook = open_workbook_auto(path)
6744        .map_err(|e| IoError::Excel(format!("cannot open workbook: {e}")))?;
6745    let available: Vec<String> = workbook.sheet_names();
6746    // Per br-frankenpandas-c9cb4: HashSet<&str> for O(1) membership;
6747    // was O(m × n) Vec::iter().any() per requested name.
6748    let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6749    let selected: Vec<String> = match sheet_names {
6750        Some(names) => {
6751            for name in names {
6752                if !available_set.contains(name.as_str()) {
6753                    return Err(IoError::Excel(format!(
6754                        "workbook does not contain sheet {name:?}"
6755                    )));
6756                }
6757            }
6758            names.to_vec()
6759        }
6760        None => available.clone(),
6761    };
6762    if selected.is_empty() {
6763        return Err(IoError::Excel("no sheets selected".to_owned()));
6764    }
6765
6766    let mut out = BTreeMap::new();
6767    for sheet in &selected {
6768        let range = workbook
6769            .worksheet_range(sheet)
6770            .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6771        let rows: Vec<Vec<calamine::Data>> = range
6772            .rows()
6773            .skip(options.skip_rows)
6774            .map(|r| r.to_vec())
6775            .collect();
6776        let frame = parse_excel_rows(rows, options)?;
6777        out.insert(sheet.clone(), frame);
6778    }
6779    Ok(out)
6780}
6781
6782/// Read multiple sheets from Excel bytes.
6783///
6784/// Byte-based counterpart to `read_excel_sheets`.
6785pub fn read_excel_sheets_bytes(
6786    data: &[u8],
6787    sheet_names: Option<&[String]>,
6788    options: &ExcelReadOptions,
6789) -> Result<BTreeMap<String, DataFrame>, IoError> {
6790    use calamine::{Reader, open_workbook_auto_from_rs};
6791
6792    let cursor = std::io::Cursor::new(data);
6793    let mut workbook = open_workbook_auto_from_rs(cursor)
6794        .map_err(|e| IoError::Excel(format!("cannot open workbook from bytes: {e}")))?;
6795    let available: Vec<String> = workbook.sheet_names();
6796    // Per br-frankenpandas-c9cb4: HashSet<&str> for O(1) membership;
6797    // was O(m × n) Vec::iter().any() per requested name.
6798    let available_set: HashSet<&str> = available.iter().map(String::as_str).collect();
6799    let selected: Vec<String> = match sheet_names {
6800        Some(names) => {
6801            for name in names {
6802                if !available_set.contains(name.as_str()) {
6803                    return Err(IoError::Excel(format!(
6804                        "workbook does not contain sheet {name:?}"
6805                    )));
6806                }
6807            }
6808            names.to_vec()
6809        }
6810        None => available.clone(),
6811    };
6812    if selected.is_empty() {
6813        return Err(IoError::Excel("no sheets selected".to_owned()));
6814    }
6815
6816    let mut out = BTreeMap::new();
6817    for sheet in &selected {
6818        let range = workbook
6819            .worksheet_range(sheet)
6820            .map_err(|e| IoError::Excel(format!("cannot read sheet {sheet:?}: {e}")))?;
6821        let rows: Vec<Vec<calamine::Data>> = range
6822            .rows()
6823            .skip(options.skip_rows)
6824            .map(|r| r.to_vec())
6825            .collect();
6826        let frame = parse_excel_rows(rows, options)?;
6827        out.insert(sheet.clone(), frame);
6828    }
6829    Ok(out)
6830}
6831
6832/// Write a DataFrame to an Excel (.xlsx) file.
6833///
6834/// Matches `pd.DataFrame.to_excel(path)`.
6835pub fn write_excel(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
6836    let bytes = write_excel_bytes(frame)?;
6837    std::fs::write(path, bytes)?;
6838    Ok(())
6839}
6840
6841fn write_excel_index_label(
6842    worksheet: &mut rust_xlsxwriter::Worksheet,
6843    excel_row: u32,
6844    excel_col: u16,
6845    label: &IndexLabel,
6846) -> Result<(), IoError> {
6847    match label {
6848        IndexLabel::Int64(v) => {
6849            worksheet
6850                .write_number(excel_row, excel_col, *v as f64)
6851                .map_err(|e| IoError::Excel(format!("write index int: {e}")))?;
6852        }
6853        IndexLabel::Utf8(s) => {
6854            worksheet
6855                .write_string(excel_row, excel_col, s.as_str())
6856                .map_err(|e| IoError::Excel(format!("write index string: {e}")))?;
6857        }
6858        IndexLabel::Timedelta64(v) => {
6859            if *v != Timedelta::NAT {
6860                worksheet
6861                    .write_string(excel_row, excel_col, Timedelta::format(*v))
6862                    .map_err(|e| IoError::Excel(format!("write index timedelta: {e}")))?;
6863            }
6864        }
6865        IndexLabel::Datetime64(v) => {
6866            if *v != i64::MIN {
6867                worksheet
6868                    .write_string(excel_row, excel_col, label.to_string())
6869                    .map_err(|e| IoError::Excel(format!("write index datetime: {e}")))?;
6870            }
6871        }
6872        // Missing labels leave the cell blank, like NAT timedelta/datetime
6873        // above (pandas writes an empty cell for a NaN index label).
6874        IndexLabel::Null(_) => {}
6875    }
6876    Ok(())
6877}
6878
6879fn write_excel_scalar(
6880    worksheet: &mut rust_xlsxwriter::Worksheet,
6881    excel_row: u32,
6882    excel_col: u16,
6883    scalar: &Scalar,
6884) -> Result<(), IoError> {
6885    match scalar {
6886        Scalar::Int64(v) => {
6887            worksheet
6888                .write_number(excel_row, excel_col, *v as f64)
6889                .map_err(|e| IoError::Excel(format!("write int: {e}")))?;
6890        }
6891        Scalar::Float64(v) if !v.is_nan() => {
6892            worksheet
6893                .write_number(excel_row, excel_col, *v)
6894                .map_err(|e| IoError::Excel(format!("write float: {e}")))?;
6895        }
6896        Scalar::Bool(b) => {
6897            worksheet
6898                .write_boolean(excel_row, excel_col, *b)
6899                .map_err(|e| IoError::Excel(format!("write bool: {e}")))?;
6900        }
6901        Scalar::Utf8(s) => {
6902            worksheet
6903                .write_string(excel_row, excel_col, s.as_str())
6904                .map_err(|e| IoError::Excel(format!("write string: {e}")))?;
6905        }
6906        Scalar::Timedelta64(v) => {
6907            if *v != Timedelta::NAT {
6908                worksheet
6909                    .write_string(excel_row, excel_col, Timedelta::format(*v))
6910                    .map_err(|e| IoError::Excel(format!("write timedelta: {e}")))?;
6911            }
6912        }
6913        Scalar::Datetime64(v) => {
6914            if *v != Timestamp::NAT {
6915                worksheet
6916                    .write_string(excel_row, excel_col, format_datetime_ns(*v))
6917                    .map_err(|e| IoError::Excel(format!("write datetime: {e}")))?;
6918            }
6919        }
6920        Scalar::Period(v) => {
6921            if *v != i64::MIN {
6922                worksheet
6923                    .write_string(excel_row, excel_col, format!("Period[{v}]"))
6924                    .map_err(|e| IoError::Excel(format!("write period: {e}")))?;
6925            }
6926        }
6927        Scalar::Interval(iv) => {
6928            worksheet
6929                .write_string(excel_row, excel_col, format!("{iv}"))
6930                .map_err(|e| IoError::Excel(format!("write interval: {e}")))?;
6931        }
6932        Scalar::Float64(_) | Scalar::Null(_) => {}
6933    }
6934    Ok(())
6935}
6936
6937/// Write a DataFrame to Excel (.xlsx) bytes in memory.
6938///
6939/// Matches pandas `DataFrame.to_excel()` default index behavior by emitting
6940/// the index as the first worksheet column.
6941pub fn write_excel_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
6942    write_excel_bytes_with_options(frame, &ExcelWriteOptions::default())
6943}
6944
6945/// Options for serializing a DataFrame to Excel.
6946///
6947/// Mirrors the subset of `pd.DataFrame.to_excel` parameters that
6948/// don't depend on the workbook writer itself.
6949#[derive(Debug, Clone)]
6950pub struct ExcelWriteOptions {
6951    /// Target sheet name. Default: `"Sheet1"` (rust_xlsxwriter default).
6952    pub sheet_name: String,
6953    /// Whether to write the row index as a leading column. Matches
6954    /// pandas `index=True|False`. Default: true.
6955    pub index: bool,
6956    /// Header label for the index column when `index=true`. When
6957    /// `None`, the frame's index name is used (falling back to an
6958    /// empty string). Matches pandas `index_label=...`.
6959    pub index_label: Option<String>,
6960    /// Whether to emit the column-name header row. Matches pandas
6961    /// `header=True|False`. Default: true.
6962    pub header: bool,
6963}
6964
6965impl Default for ExcelWriteOptions {
6966    fn default() -> Self {
6967        Self {
6968            sheet_name: "Sheet1".to_string(),
6969            index: true,
6970            index_label: None,
6971            header: true,
6972        }
6973    }
6974}
6975
6976/// Serialize a DataFrame to Excel bytes with explicit options.
6977///
6978/// Matches `pd.DataFrame.to_excel(sheet_name, index, index_label,
6979/// header)` for the in-memory byte form. The default `ExcelWriteOptions`
6980/// reproduces the existing `write_excel_bytes` behavior (index=true,
6981/// sheet_name="Sheet1").
6982pub fn write_excel_bytes_with_options(
6983    frame: &DataFrame,
6984    options: &ExcelWriteOptions,
6985) -> Result<Vec<u8>, IoError> {
6986    if options.index && frame.row_multiindex().is_some() {
6987        let materialized = materialize_named_row_multiindex_columns(frame)?;
6988        let mut nested_options = options.clone();
6989        nested_options.index = false;
6990        nested_options.index_label = None;
6991        return write_excel_bytes_with_options(&materialized, &nested_options);
6992    }
6993
6994    use rust_xlsxwriter::Workbook;
6995
6996    let mut workbook = Workbook::new();
6997    let worksheet = workbook.add_worksheet();
6998    worksheet
6999        .set_name(options.sheet_name.as_str())
7000        .map_err(|e| IoError::Excel(format!("set sheet name: {e}")))?;
7001
7002    let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
7003    let data_col_offset: u16 = if options.index { 1 } else { 0 };
7004
7005    // Header row (optional).
7006    if options.header {
7007        if options.index {
7008            let idx_header = options
7009                .index_label
7010                .as_deref()
7011                .unwrap_or_else(|| frame.index().name().unwrap_or(""));
7012            worksheet
7013                .write_string(0, 0, idx_header)
7014                .map_err(|e| IoError::Excel(format!("write index header: {e}")))?;
7015        }
7016        for (col_idx, name) in col_names.iter().enumerate() {
7017            worksheet
7018                .write_string(0, data_col_offset + col_idx as u16, name.as_str())
7019                .map_err(|e| IoError::Excel(format!("write header: {e}")))?;
7020        }
7021    }
7022
7023    // Data rows — when header=true the first data row lands at excel
7024    // row 1; when header=false data starts at row 0.
7025    let header_rows: u32 = if options.header { 1 } else { 0 };
7026    let nrows = frame.index().len();
7027    for row_idx in 0..nrows {
7028        let excel_row = row_idx as u32 + header_rows;
7029        if options.index
7030            && let Some(label) = frame.index().labels().get(row_idx)
7031        {
7032            write_excel_index_label(worksheet, excel_row, 0, label)?;
7033        }
7034        for (col_idx, name) in col_names.iter().enumerate() {
7035            if let Some(col) = frame.column(name)
7036                && let Some(scalar) = col.value(row_idx)
7037            {
7038                write_excel_scalar(
7039                    worksheet,
7040                    excel_row,
7041                    data_col_offset + col_idx as u16,
7042                    scalar,
7043                )?;
7044            }
7045        }
7046    }
7047
7048    let buf = workbook
7049        .save_to_buffer()
7050        .map_err(|e| IoError::Excel(format!("save workbook: {e}")))?;
7051
7052    Ok(buf)
7053}
7054
7055/// File-based counterpart to `write_excel_bytes_with_options`.
7056pub fn write_excel_with_options(
7057    frame: &DataFrame,
7058    path: &Path,
7059    options: &ExcelWriteOptions,
7060) -> Result<(), IoError> {
7061    let bytes = write_excel_bytes_with_options(frame, options)?;
7062    std::fs::write(path, bytes)?;
7063    Ok(())
7064}
7065
7066// ── Arrow IPC / Feather I/O ──────────────────────────────────────────────
7067
7068/// Write a DataFrame to Arrow IPC (Feather v2) bytes in memory.
7069///
7070/// Matches `pd.DataFrame.to_feather()`. Feather v2 is the Arrow IPC file format
7071/// — the fastest columnar interchange format, recommended by pandas over HDF5.
7072pub fn write_feather_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
7073    use arrow::ipc::writer::FileWriter;
7074
7075    let batch = dataframe_to_record_batch(frame)?;
7076    let schema = batch.schema();
7077
7078    let mut buf = Vec::new();
7079    let mut writer =
7080        FileWriter::try_new(&mut buf, &schema).map_err(|e| IoError::Arrow(e.to_string()))?;
7081    writer
7082        .write(&batch)
7083        .map_err(|e| IoError::Arrow(e.to_string()))?;
7084    writer.finish().map_err(|e| IoError::Arrow(e.to_string()))?;
7085    Ok(buf)
7086}
7087
7088/// Read a DataFrame from Arrow IPC (Feather v2) bytes in memory.
7089///
7090/// Matches `pd.read_feather()`.
7091pub fn read_feather_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
7092    use arrow::ipc::reader::FileReader;
7093
7094    let cursor = std::io::Cursor::new(data);
7095    let reader = FileReader::try_new(cursor, None).map_err(|e| IoError::Arrow(e.to_string()))?;
7096
7097    let mut all_frames: Vec<DataFrame> = Vec::new();
7098    for batch_result in reader {
7099        let batch = batch_result.map_err(|e| IoError::Arrow(e.to_string()))?;
7100        all_frames.push(record_batch_to_dataframe(&batch)?);
7101    }
7102
7103    if all_frames.is_empty() {
7104        return Ok(DataFrame::new_with_column_order(
7105            Index::new(vec![]),
7106            BTreeMap::new(),
7107            vec![],
7108        )?);
7109    }
7110
7111    if all_frames.len() == 1 {
7112        if let Some(frame) = all_frames.into_iter().next() {
7113            return Ok(frame);
7114        }
7115        return Err(IoError::Arrow(
7116            "feather reader produced zero record batches".to_owned(),
7117        ));
7118    }
7119
7120    let refs: Vec<&DataFrame> = all_frames.iter().collect();
7121    fp_frame::concat_dataframes(&refs).map_err(IoError::from)
7122}
7123
7124/// Write a DataFrame to an Arrow IPC (Feather v2) file.
7125///
7126/// Matches `pd.DataFrame.to_feather(path)`.
7127pub fn write_feather(frame: &DataFrame, path: &Path) -> Result<(), IoError> {
7128    let bytes = write_feather_bytes(frame)?;
7129    std::fs::write(path, bytes)?;
7130    Ok(())
7131}
7132
7133/// Read a DataFrame from an Arrow IPC (Feather v2) file.
7134///
7135/// Matches `pd.read_feather(path)`.
7136pub fn read_feather(path: &Path) -> Result<DataFrame, IoError> {
7137    let data = std::fs::read(path)?;
7138    read_feather_bytes(&data)
7139}
7140
7141/// Write a DataFrame to Arrow IPC stream bytes (streaming format, no random access).
7142///
7143/// Unlike Feather (file format), the stream format has no footer and supports
7144/// streaming reads without seeking. Used for inter-process communication.
7145pub fn write_ipc_stream_bytes(frame: &DataFrame) -> Result<Vec<u8>, IoError> {
7146    use arrow::ipc::writer::StreamWriter;
7147
7148    let batch = dataframe_to_record_batch(frame)?;
7149    let schema = batch.schema();
7150
7151    let mut buf = Vec::new();
7152    let mut writer =
7153        StreamWriter::try_new(&mut buf, &schema).map_err(|e| IoError::Arrow(e.to_string()))?;
7154    writer
7155        .write(&batch)
7156        .map_err(|e| IoError::Arrow(e.to_string()))?;
7157    writer.finish().map_err(|e| IoError::Arrow(e.to_string()))?;
7158    Ok(buf)
7159}
7160
7161/// Read a DataFrame from Arrow IPC stream bytes (streaming format).
7162pub fn read_ipc_stream_bytes(data: &[u8]) -> Result<DataFrame, IoError> {
7163    use arrow::ipc::reader::StreamReader;
7164
7165    let cursor = std::io::Cursor::new(data);
7166    let reader = StreamReader::try_new(cursor, None).map_err(|e| IoError::Arrow(e.to_string()))?;
7167
7168    let mut all_frames: Vec<DataFrame> = Vec::new();
7169    for batch_result in reader {
7170        let batch = batch_result.map_err(|e| IoError::Arrow(e.to_string()))?;
7171        all_frames.push(record_batch_to_dataframe(&batch)?);
7172    }
7173
7174    if all_frames.is_empty() {
7175        return Ok(DataFrame::new_with_column_order(
7176            Index::new(vec![]),
7177            BTreeMap::new(),
7178            vec![],
7179        )?);
7180    }
7181
7182    if all_frames.len() == 1 {
7183        if let Some(frame) = all_frames.into_iter().next() {
7184            return Ok(frame);
7185        }
7186        return Err(IoError::Arrow(
7187            "ipc stream reader produced zero record batches".to_owned(),
7188        ));
7189    }
7190
7191    let refs: Vec<&DataFrame> = all_frames.iter().collect();
7192    fp_frame::concat_dataframes(&refs).map_err(IoError::from)
7193}
7194
7195// ── SQL I/O ─────────────────────────────────────────────────────────────
7196
7197/// Options for writing a DataFrame to SQL.
7198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7199pub enum SqlIfExists {
7200    /// Raise an error if the table already exists.
7201    Fail,
7202    /// Drop the table and recreate it.
7203    Replace,
7204    /// Insert new rows into the existing table.
7205    Append,
7206}
7207
7208/// Strategy for emitting INSERT statements during `write_sql`.
7209///
7210/// Matches `pd.DataFrame.to_sql(.., method=...)` shape:
7211/// `Single` (default) emits one `INSERT INTO t VALUES (?, ...)` per row,
7212/// reusing a prepared statement under a transaction. `Multi` builds a
7213/// single multi-row `INSERT INTO t VALUES (...), (...), ...` statement
7214/// per chunk, where chunk size is `max_param_count() / num_cols` (or
7215/// the whole frame when the backend reports no max). `Multi` typically
7216/// wins on backends with high per-statement overhead (PostgreSQL,
7217/// MySQL); SQLite is already fast under prepared-statement reuse so the
7218/// gap there is small.
7219///
7220/// Per br-frankenpandas-i0ml (fd90.19).
7221#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7222pub enum SqlInsertMethod {
7223    /// One INSERT per row, prepared once and re-bound per row.
7224    #[default]
7225    Single,
7226    /// Multi-row INSERT, chunked by the backend's parameter limit.
7227    Multi,
7228}
7229
7230/// Options for reading SQL query results into a DataFrame.
7231#[derive(Debug, Clone)]
7232pub struct SqlReadOptions {
7233    /// Positional parameters to bind to `?` placeholders in the SQL query.
7234    pub params: Option<Vec<Scalar>>,
7235    /// Column names to coerce via pandas-style parse_dates handling.
7236    /// Currently supports explicit column-name selection.
7237    pub parse_dates: Option<Vec<String>>,
7238    /// Promote decimal-like SQL text result columns to Float64.
7239    ///
7240    /// This is an opt-in form of pandas' `coerce_float` behavior for
7241    /// backends that expose NUMERIC/DECIMAL/MONEY values through text.
7242    /// Columns containing any non-numeric strings are left unchanged.
7243    pub coerce_float: bool,
7244    /// Per-column dtype override applied after row materialization.
7245    ///
7246    /// Matches `pd.read_sql(.., dtype={'col': 'float64'})`. Each entry casts
7247    /// the named column to the declared dtype using `fp_types::cast_scalar_owned`.
7248    /// Map entries for columns not present in the result are silently
7249    /// ignored (matches pandas). Columns also listed in `parse_dates` are
7250    /// skipped to avoid double-cast errors — parse_dates wins.
7251    ///
7252    /// Per br-frankenpandas-l9pt (fd90.11).
7253    pub dtype: Option<BTreeMap<String, DType>>,
7254    /// Optional schema namespace for `read_sql_table` lookups.
7255    ///
7256    /// Matches `pd.read_sql_table(table, con, schema=...)`. When the
7257    /// backend reports `supports_schemas() == true` (PostgreSQL, MySQL,
7258    /// MSSQL, etc.) and `schema` is `Some(s)`, the SELECT references
7259    /// `s.table` with each part quoted by `conn.quote_identifier(...)`.
7260    /// When the backend reports `supports_schemas() == false` (SQLite),
7261    /// any `Some(s)` here is rejected before query execution. Pandas /
7262    /// SQLAlchemy raises `NotImplementedError` on that surface; failing
7263    /// closed avoids silently reading an unqualified table from the wrong
7264    /// namespace.
7265    ///
7266    /// Per br-frankenpandas-u6zn (fd90.14).
7267    pub schema: Option<String>,
7268    /// Optional projection list for `read_sql_table` reads.
7269    ///
7270    /// Matches `pd.read_sql_table(table, con, columns=[...])`. When
7271    /// `Some(list)`, the emitted SELECT projects only those columns
7272    /// (and in that order) instead of `SELECT *`. `None` preserves
7273    /// `SELECT *`. An empty Vec is rejected with `IoError::Sql` —
7274    /// pandas raises ValueError there. Each entry is validated via
7275    /// the standard alphanumeric+underscore policy.
7276    ///
7277    /// Note: `read_sql` / `read_sql_query` ignore this field — it
7278    /// only takes effect on `read_sql_table*` paths, where
7279    /// frankenpandas builds the SELECT itself.
7280    ///
7281    /// Per br-frankenpandas-d3e9 (fd90.34).
7282    pub columns: Option<Vec<String>>,
7283    /// Optional column name to promote to the DataFrame index.
7284    ///
7285    /// Matches `pd.read_sql(.., index_col=...)` and
7286    /// `pd.read_sql_table(table, con, index_col=...)`. When
7287    /// `Some(name)`, after row materialization the named column is
7288    /// removed from the result and used as the DataFrame index. The
7289    /// column must exist in the read result (or in the projection
7290    /// when `columns` is also set). `None` preserves the default
7291    /// RangeIndex.
7292    ///
7293    /// Empty string is rejected with `IoError::Sql` to match
7294    /// pandas' ValueError. List-of-strings (for MultiIndex) is out
7295    /// of scope for this slice — single index only.
7296    ///
7297    /// Per br-frankenpandas-c1h9 (fd90.36).
7298    pub index_col: Option<String>,
7299}
7300
7301impl Default for SqlReadOptions {
7302    /// Per br-frankenpandas-o0x6 (fd90.41): pandas defaults
7303    /// `coerce_float=True` for `read_sql` / `read_sql_query` /
7304    /// `read_sql_table` so we follow suit. Other defaults are the
7305    /// natural empty / None values.
7306    fn default() -> Self {
7307        Self {
7308            params: None,
7309            parse_dates: None,
7310            coerce_float: true,
7311            dtype: None,
7312            schema: None,
7313            columns: None,
7314            index_col: None,
7315        }
7316    }
7317}
7318
7319/// Options for writing a DataFrame to SQL.
7320#[derive(Debug, Clone, PartialEq, Eq)]
7321pub struct SqlWriteOptions {
7322    /// Behavior when the target table already exists.
7323    pub if_exists: SqlIfExists,
7324    /// Whether to materialize the DataFrame index as the leading SQL column.
7325    pub index: bool,
7326    /// Optional override for the emitted index column name.
7327    pub index_label: Option<String>,
7328    /// Optional schema namespace for CREATE TABLE / INSERT routing.
7329    ///
7330    /// Matches `pd.DataFrame.to_sql(table, con, schema=...)`. When the
7331    /// backend reports `supports_schemas() == true` and `schema` is
7332    /// `Some(s)`, generated CREATE TABLE / INSERT statements target
7333    /// `\"s\".\"table\"`. On backends that report `false` (SQLite),
7334    /// `Some(s)` is silently ignored — preserves SQLite users' existing
7335    /// option structs.
7336    ///
7337    /// Per br-frankenpandas-udn6 (fd90.15).
7338    pub schema: Option<String>,
7339    /// Per-column SQL-type override applied during CREATE TABLE.
7340    ///
7341    /// Matches `pd.DataFrame.to_sql(.., dtype={'amount': 'NUMERIC(10,2)'})`.
7342    /// Each entry's value is the literal SQL type string emitted in the
7343    /// column definition for that column. Map entries for columns not
7344    /// in the frame are silently ignored (matches pandas). Falls back
7345    /// to `conn.dtype_sql(DType)` when no override is present.
7346    ///
7347    /// Per br-frankenpandas-ev2s (fd90.18).
7348    pub dtype: Option<BTreeMap<String, String>>,
7349    /// INSERT-emission strategy.
7350    ///
7351    /// Default `Single` matches pandas' default: one INSERT per row,
7352    /// re-binding a prepared statement under a transaction. `Multi`
7353    /// switches to multi-row VALUES batched by `conn.max_param_count()`,
7354    /// matching `pd.to_sql(.., method='multi')`.
7355    ///
7356    /// Per br-frankenpandas-i0ml (fd90.19).
7357    pub method: SqlInsertMethod,
7358    /// Maximum rows per transaction-bounded INSERT chunk.
7359    ///
7360    /// Matches `pd.DataFrame.to_sql(.., chunksize=...)`. When `Some(n)`,
7361    /// the row emit loop batches into chunks of `n` rows, each routed
7362    /// through its own `insert_rows` call (which on transactional
7363    /// backends commits a fresh transaction per chunk). This caps WAL /
7364    /// journal size for huge frames where the default `Single`
7365    /// single-transaction mode would balloon. For `Multi` mode the
7366    /// effective per-chunk row count is `min(chunksize,
7367    /// max_param_count / num_cols)`. `None` preserves existing
7368    /// single-transaction semantics.
7369    ///
7370    /// `Some(0)` is rejected — pandas raises ValueError there too.
7371    ///
7372    /// Per br-frankenpandas-ls9z (fd90.33).
7373    pub chunksize: Option<usize>,
7374}
7375
7376/// Backend-agnostic in-memory representation of a SQL query result.
7377#[derive(Debug, Clone, PartialEq)]
7378pub struct SqlQueryResult {
7379    pub columns: Vec<String>,
7380    pub rows: Vec<Vec<Scalar>>,
7381}
7382
7383type SqlColumnDtypeHints = Vec<Option<DType>>;
7384type SqlMaterializedColumns = (Vec<String>, Vec<Vec<Scalar>>, SqlColumnDtypeHints);
7385
7386/// Backend-neutral SQL column metadata.
7387#[derive(Debug, Clone, PartialEq, Eq)]
7388pub struct SqlColumnSchema {
7389    pub name: String,
7390    pub declared_type: Option<String>,
7391    pub nullable: bool,
7392    pub default_value: Option<String>,
7393    pub primary_key_ordinal: Option<usize>,
7394    /// Column-level comment text, when the backend exposes one.
7395    ///
7396    /// Per br-frankenpandas-cfld (fd90.35). SQLite has no column-comment
7397    /// storage so the rusqlite override always emits `None`. PostgreSQL
7398    /// impls populate from `pg_catalog.pg_description.col_description`;
7399    /// MySQL uses `information_schema.columns.column_comment`; MSSQL
7400    /// reads from `sys.extended_properties`. Companion to the
7401    /// table-level `table_comment` (fd90.32) — together they round
7402    /// out SQLAlchemy.Inspector.get_columns() parity (its dict shape
7403    /// includes a `'comment'` key).
7404    pub comment: Option<String>,
7405    /// Whether the column is auto-incrementing.
7406    ///
7407    /// Per br-frankenpandas-bkl2 (fd90.37). Completes
7408    /// SQLAlchemy.Inspector.get_columns() parity (the dict shape
7409    /// includes an `'autoincrement'` key).
7410    ///
7411    /// SQLite detection rule (in the rusqlite `table_schema`
7412    /// override): true when `declared_type` is `INTEGER`
7413    /// (case-insensitive) AND the column is the sole primary key
7414    /// (`primary_key_ordinal == Some(0)`) — SQLite makes
7415    /// `INTEGER PRIMARY KEY` an alias for the auto-increment
7416    /// `rowid`. The optional `AUTOINCREMENT` keyword affects
7417    /// whether IDs are reused after delete but does not change the
7418    /// "is auto-incrementing" property pandas cares about.
7419    ///
7420    /// Other backends: PG SERIAL/BIGSERIAL/IDENTITY columns
7421    /// `true`; MySQL `AUTO_INCREMENT` modifier `true`; otherwise
7422    /// `false`.
7423    pub autoincrement: bool,
7424}
7425
7426/// Backend-neutral SQL table metadata.
7427#[derive(Debug, Clone, PartialEq, Eq)]
7428pub struct SqlTableSchema {
7429    pub table_name: String,
7430    pub columns: Vec<SqlColumnSchema>,
7431}
7432
7433impl SqlTableSchema {
7434    pub fn column(&self, name: &str) -> Option<&SqlColumnSchema> {
7435        self.columns.iter().find(|column| column.name == name)
7436    }
7437}
7438
7439/// Backend-neutral SQL index metadata.
7440///
7441/// Per br-frankenpandas-bgv9 (fd90.28). Used by `list_indexes` /
7442/// `list_sql_indexes` to surface user-defined indexes so callers can
7443/// align with `SQLAlchemy.Inspector.get_indexes()` shape.
7444#[derive(Debug, Clone, PartialEq, Eq)]
7445pub struct SqlIndexSchema {
7446    pub name: String,
7447    pub columns: Vec<String>,
7448    pub unique: bool,
7449}
7450
7451/// Backend-neutral SQL unique-constraint metadata.
7452///
7453/// Per br-frankenpandas-sh4v (fd90.31). Surfaces inline `UNIQUE`
7454/// declarations and `UNIQUE (...)` table constraints separately from
7455/// user-created `CREATE UNIQUE INDEX` (those land in
7456/// `SqlIndexSchema` via `list_indexes`). `name` may be backend-
7457/// generated (SQLite reports `sqlite_autoindex_<table>_<n>`) when
7458/// the constraint was declared inline without an explicit name —
7459/// we surface the backend's name verbatim rather than fabricating.
7460#[derive(Debug, Clone, PartialEq, Eq)]
7461pub struct SqlUniqueConstraintSchema {
7462    pub name: String,
7463    pub columns: Vec<String>,
7464}
7465
7466/// Bundle of all introspection metadata for a single SQL table.
7467///
7468/// Per br-frankenpandas-76mw (fd90.40). Returned by
7469/// `SqlInspector::reflect_table` to give callers the full picture of
7470/// a table in one call instead of 5 separate trait dispatches.
7471/// Mirrors the bundled view that `SQLAlchemy.MetaData.reflect_table`
7472/// builds internally.
7473#[derive(Debug, Clone, PartialEq, Eq)]
7474pub struct SqlReflectedTable {
7475    pub table_name: String,
7476    pub columns: Vec<SqlColumnSchema>,
7477    pub primary_key_columns: Vec<String>,
7478    pub indexes: Vec<SqlIndexSchema>,
7479    pub foreign_keys: Vec<SqlForeignKeySchema>,
7480    pub unique_constraints: Vec<SqlUniqueConstraintSchema>,
7481    pub comment: Option<String>,
7482}
7483
7484impl SqlReflectedTable {
7485    /// Look up a column by name. Mirrors `SqlTableSchema::column`.
7486    ///
7487    /// Per br-frankenpandas-63ac (fd90.51).
7488    #[must_use]
7489    pub fn column(&self, name: &str) -> Option<&SqlColumnSchema> {
7490        self.columns.iter().find(|c| c.name == name)
7491    }
7492
7493    /// Look up an index by name.
7494    ///
7495    /// Per br-frankenpandas-63ac (fd90.51).
7496    #[must_use]
7497    pub fn index(&self, name: &str) -> Option<&SqlIndexSchema> {
7498        self.indexes.iter().find(|i| i.name == name)
7499    }
7500
7501    /// Look up a unique constraint by name (backend-generated names
7502    /// like `sqlite_autoindex_*` count too — match what
7503    /// `list_unique_constraints` surfaced).
7504    ///
7505    /// Per br-frankenpandas-63ac (fd90.51).
7506    #[must_use]
7507    pub fn unique_constraint(&self, name: &str) -> Option<&SqlUniqueConstraintSchema> {
7508        self.unique_constraints.iter().find(|u| u.name == name)
7509    }
7510
7511    /// Find every foreign key whose `columns` slice contains `column_name`.
7512    ///
7513    /// A given column may participate in multiple FK constraints (e.g.
7514    /// the same column referenced by separate FKs to different parents
7515    /// — rare but valid SQL). Returns the matching FKs in their
7516    /// declaration order from PRAGMA foreign_key_list.
7517    ///
7518    /// Per br-frankenpandas-63ac (fd90.51).
7519    #[must_use]
7520    pub fn foreign_keys_for_column(&self, column_name: &str) -> Vec<&SqlForeignKeySchema> {
7521        self.foreign_keys
7522            .iter()
7523            .filter(|fk| fk.columns.iter().any(|c| c == column_name))
7524            .collect()
7525    }
7526
7527    /// Find every index whose `columns` slice contains `column_name`.
7528    ///
7529    /// Matches multi-column indexes where the column appears at any
7530    /// position (first, middle, last) — useful for answering "is this
7531    /// column indexed" rather than the more restrictive "is this
7532    /// column the leading entry of an index" question.
7533    ///
7534    /// Per br-frankenpandas-37uy (fd90.52).
7535    #[must_use]
7536    pub fn indexes_for_column(&self, column_name: &str) -> Vec<&SqlIndexSchema> {
7537        self.indexes
7538            .iter()
7539            .filter(|i| i.columns.iter().any(|c| c == column_name))
7540            .collect()
7541    }
7542
7543    /// Find every UNIQUE constraint whose `columns` slice contains
7544    /// `column_name`.
7545    ///
7546    /// Same any-position match semantics as `indexes_for_column` and
7547    /// `foreign_keys_for_column`.
7548    ///
7549    /// Per br-frankenpandas-37uy (fd90.52).
7550    #[must_use]
7551    pub fn unique_constraints_for_column(
7552        &self,
7553        column_name: &str,
7554    ) -> Vec<&SqlUniqueConstraintSchema> {
7555        self.unique_constraints
7556            .iter()
7557            .filter(|u| u.columns.iter().any(|c| c == column_name))
7558            .collect()
7559    }
7560}
7561
7562/// Backend-neutral SQL foreign-key constraint metadata.
7563///
7564/// Per br-frankenpandas-uht8 (fd90.29). Aligns with
7565/// `SQLAlchemy.Inspector.get_foreign_keys()` shape: a single FK
7566/// constraint may span multiple columns (composite FK), and
7567/// `columns[i]` references `referenced_columns[i]` on
7568/// `referenced_table`. `constraint_name` is `None` for SQLite
7569/// inline FKs declared without an explicit CONSTRAINT name (PRAGMA
7570/// foreign_key_list does not surface a name, so we return None
7571/// there rather than fabricating one).
7572#[derive(Debug, Clone, PartialEq, Eq)]
7573pub struct SqlForeignKeySchema {
7574    pub constraint_name: Option<String>,
7575    pub columns: Vec<String>,
7576    pub referenced_table: String,
7577    pub referenced_columns: Vec<String>,
7578}
7579
7580/// Iterator over DataFrame chunks produced by a SQL query.
7581pub struct SqlChunkIterator<'conn> {
7582    state: SqlChunkIteratorState<'conn>,
7583}
7584
7585enum SqlChunkIteratorState<'conn> {
7586    Materialized {
7587        headers: Vec<String>,
7588        columns: Vec<Vec<Scalar>>,
7589        dtype_hints: SqlColumnDtypeHints,
7590        chunk_size: usize,
7591        next_row: usize,
7592    },
7593    Paged {
7594        conn: &'conn dyn SqlConnection,
7595        query: String,
7596        options: SqlReadOptions,
7597        headers: Vec<String>,
7598        chunk_size: usize,
7599        next_offset: usize,
7600        finished: bool,
7601    },
7602}
7603
7604impl std::fmt::Debug for SqlChunkIterator<'_> {
7605    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7606        match &self.state {
7607            SqlChunkIteratorState::Materialized {
7608                headers,
7609                columns,
7610                dtype_hints: _,
7611                chunk_size,
7612                next_row,
7613            } => f
7614                .debug_struct("SqlChunkIterator")
7615                .field("mode", &"materialized")
7616                .field("headers", headers)
7617                .field("row_count", &columns.first().map_or(0, Vec::len))
7618                .field("chunk_size", chunk_size)
7619                .field("next_row", next_row)
7620                .finish(),
7621            SqlChunkIteratorState::Paged {
7622                query,
7623                headers,
7624                chunk_size,
7625                next_offset,
7626                finished,
7627                ..
7628            } => f
7629                .debug_struct("SqlChunkIterator")
7630                .field("mode", &"paged")
7631                .field("query", query)
7632                .field("headers", headers)
7633                .field("chunk_size", chunk_size)
7634                .field("next_offset", next_offset)
7635                .field("finished", finished)
7636                .finish(),
7637        }
7638    }
7639}
7640
7641impl<'conn> SqlChunkIterator<'conn> {
7642    fn materialized(
7643        headers: Vec<String>,
7644        columns: Vec<Vec<Scalar>>,
7645        dtype_hints: SqlColumnDtypeHints,
7646        chunk_size: usize,
7647    ) -> Self {
7648        Self {
7649            state: SqlChunkIteratorState::Materialized {
7650                headers,
7651                columns,
7652                dtype_hints,
7653                chunk_size,
7654                next_row: 0,
7655            },
7656        }
7657    }
7658
7659    fn paged<C: SqlConnection + 'conn>(
7660        conn: &'conn C,
7661        query: &str,
7662        options: &SqlReadOptions,
7663        chunk_size: usize,
7664    ) -> Result<Self, IoError> {
7665        let headers = sql_paged_query_headers(conn, query, options)?;
7666        Ok(Self {
7667            state: SqlChunkIteratorState::Paged {
7668                conn,
7669                query: sql_trim_chunk_source(query)?.to_owned(),
7670                options: options.clone(),
7671                headers,
7672                chunk_size,
7673                next_offset: 0,
7674                finished: false,
7675            },
7676        })
7677    }
7678
7679    fn headers(&self) -> &[String] {
7680        match &self.state {
7681            SqlChunkIteratorState::Materialized { headers, .. }
7682            | SqlChunkIteratorState::Paged { headers, .. } => headers,
7683        }
7684    }
7685}
7686
7687impl Iterator for SqlChunkIterator<'_> {
7688    type Item = Result<DataFrame, IoError>;
7689
7690    fn next(&mut self) -> Option<Self::Item> {
7691        match &mut self.state {
7692            SqlChunkIteratorState::Materialized {
7693                headers,
7694                columns,
7695                dtype_hints,
7696                chunk_size,
7697                next_row,
7698            } => {
7699                let row_count = columns.first().map_or(0, Vec::len);
7700                if *next_row >= row_count {
7701                    return None;
7702                }
7703
7704                let start = *next_row;
7705                let end = start.saturating_add(*chunk_size).min(row_count);
7706                *next_row = end;
7707
7708                let chunk_columns = columns
7709                    .iter()
7710                    .map(|column| column[start..end].to_vec())
7711                    .collect();
7712                Some(dataframe_from_sql_columns(
7713                    headers.clone(),
7714                    chunk_columns,
7715                    dtype_hints.clone(),
7716                ))
7717            }
7718            SqlChunkIteratorState::Paged {
7719                conn,
7720                query,
7721                options,
7722                chunk_size,
7723                next_offset,
7724                finished,
7725                ..
7726            } => {
7727                if *finished {
7728                    return None;
7729                }
7730
7731                let page =
7732                    sql_query_to_columns_paged(*conn, query, options, *chunk_size, *next_offset);
7733                Some(match page {
7734                    Ok((headers, columns, dtype_hints)) => {
7735                        let row_count = columns.first().map_or(0, Vec::len);
7736                        if row_count == 0 {
7737                            *finished = true;
7738                            return None;
7739                        }
7740                        if row_count < *chunk_size {
7741                            *finished = true;
7742                        }
7743                        *next_offset = next_offset.saturating_add(row_count);
7744                        dataframe_from_sql_columns(headers, columns, dtype_hints)
7745                    }
7746                    Err(err) => {
7747                        *finished = true;
7748                        Err(err)
7749                    }
7750                })
7751            }
7752        }
7753    }
7754}
7755
7756/// Iterator over SQL DataFrame chunks with optional per-chunk index promotion.
7757pub struct SqlIndexedChunkIterator<'conn> {
7758    inner: SqlChunkIterator<'conn>,
7759    index_col: Option<String>,
7760}
7761
7762impl std::fmt::Debug for SqlIndexedChunkIterator<'_> {
7763    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7764        f.debug_struct("SqlIndexedChunkIterator")
7765            .field("inner", &self.inner)
7766            .field("index_col", &self.index_col)
7767            .finish()
7768    }
7769}
7770
7771impl Iterator for SqlIndexedChunkIterator<'_> {
7772    type Item = Result<DataFrame, IoError>;
7773
7774    fn next(&mut self) -> Option<Self::Item> {
7775        let chunk = self.inner.next()?;
7776        Some(match (chunk, self.index_col.as_deref()) {
7777            (Ok(frame), Some(index_col)) => apply_sql_index_col(frame, Some(index_col)),
7778            (Ok(frame), None) => Ok(frame),
7779            (Err(err), _) => Err(err),
7780        })
7781    }
7782}
7783
7784fn sql_indexed_chunks<'conn>(
7785    inner: SqlChunkIterator<'conn>,
7786    index_col: Option<&str>,
7787) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
7788    if let Some(col_name) = index_col {
7789        if col_name.is_empty() {
7790            return Err(IoError::Sql(
7791                "index_col: empty string is not a valid column name".to_owned(),
7792            ));
7793        }
7794        if !inner.headers().iter().any(|header| header == col_name) {
7795            return Err(IoError::Sql(format!(
7796                "index_col {col_name:?} not present in result columns"
7797            )));
7798        }
7799    }
7800    Ok(SqlIndexedChunkIterator {
7801        inner,
7802        index_col: index_col.map(str::to_owned),
7803    })
7804}
7805
7806/// Minimal SQL connection surface needed by FrankenPandas SQL IO.
7807pub trait SqlConnection {
7808    fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError>;
7809
7810    /// Return optional dtype hints for each result column in `query`.
7811    ///
7812    /// Backends that expose declared result-column types should override this
7813    /// so empty/all-null SQL results keep their table schema instead of
7814    /// falling back to `DType::Null`.
7815    fn query_column_dtypes(
7816        &self,
7817        _query: &str,
7818        _params: &[Scalar],
7819    ) -> Result<Vec<Option<DType>>, IoError> {
7820        Ok(Vec::new())
7821    }
7822
7823    /// Whether `read_sql_chunks*` may page this backend with a bounded
7824    /// `LIMIT`/`OFFSET` wrapper instead of materializing the whole result
7825    /// before the first chunk. Defaults to `false` so lightweight test
7826    /// doubles and custom backends keep the legacy behavior until they opt in.
7827    fn supports_paged_sql_chunks(&self) -> bool {
7828        false
7829    }
7830
7831    fn execute_batch(&self, sql: &str) -> Result<(), IoError>;
7832
7833    fn table_exists(&self, table_name: &str) -> Result<bool, IoError>;
7834
7835    fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError>;
7836
7837    fn dtype_sql(&self, dtype: DType) -> &'static str;
7838
7839    fn index_dtype_sql(&self, index: &Index) -> &'static str;
7840
7841    /// Return the bind marker for the one-based parameter ordinal.
7842    ///
7843    /// SQLite and MySQL accept `?`; PostgreSQL-style backends use `$1`,
7844    /// `$2`, ... . Keeping marker generation on the backend trait lets
7845    /// write_sql stay generic without leaking backend dialect branches.
7846    fn parameter_marker(&self, _ordinal: usize) -> String {
7847        "?".to_owned()
7848    }
7849
7850    // ── Backend-capability + dialect probes (br-frankenpandas-6dtf) ─────
7851    //
7852    // Default impls return conservative values so existing implementations
7853    // stay backwards-compatible; concrete backends override per their
7854    // engine. Phase 2 wires these probes into the read_sql / read_sql_table
7855    // dispatch path so per-backend SQL quirks (RETURNING, max param count,
7856    // transaction semantics) can fan out without leaking concrete types.
7857
7858    /// Short identifier for this backend's SQL dialect.
7859    ///
7860    /// Used by diagnostics + DISCREPANCIES.md routing. Backends should
7861    /// override with the canonical pandas/SQLAlchemy dialect name:
7862    /// `"sqlite"`, `"postgresql"`, `"mysql"`, `"mariadb"`, `"oracle"`, etc.
7863    /// Default `"unknown"` flags un-customized impls during reviews.
7864    fn dialect_name(&self) -> &'static str {
7865        "unknown"
7866    }
7867
7868    /// Whether this backend honors `INSERT ... RETURNING ...` natively.
7869    ///
7870    /// Drives the write_sql path's choice between RETURNING-based row
7871    /// retrieval and a follow-up SELECT. Default `false` is the
7872    /// conservative choice (forces follow-up SELECT path) until each
7873    /// backend opts in.
7874    fn supports_returning(&self) -> bool {
7875        false
7876    }
7877
7878    /// Hard upper bound on bound-parameter count per statement, if known.
7879    ///
7880    /// SQLite (3.32+): 32766. PostgreSQL: 65535. MySQL: 65535. Backends
7881    /// that don't surface a meaningful cap return `None`. The bulk-insert
7882    /// path uses this to chunk multi-row INSERTs so a single executemany
7883    /// stays under the backend's parameter ceiling.
7884    fn max_param_count(&self) -> Option<usize> {
7885        None
7886    }
7887
7888    /// Maximum identifier length supported by the backend, or `None`
7889    /// when no documented limit exists (or the limit is irrelevant for
7890    /// the deployment).
7891    ///
7892    /// Per br-frankenpandas-cs81 (fd90.26). Defaults to `None`. Known
7893    /// caps: PostgreSQL = 63, MySQL = 64, MSSQL = 128, Oracle = 30
7894    /// (pre-12.2) or 128 (12.2+), SQLite = no documented limit.
7895    /// Useful for to_sql validation when emitting auto-generated
7896    /// index/constraint/column names that could otherwise exceed
7897    /// backend limits and produce truncated or rejected DDL.
7898    fn max_identifier_length(&self) -> Option<usize> {
7899        None
7900    }
7901
7902    /// Run `f` inside a transaction. The default impl runs `f` without
7903    /// BEGIN/COMMIT — backends that support transactions should override
7904    /// to wrap in their native transaction primitive (rusqlite `BEGIN`,
7905    /// tokio-postgres `BEGIN`, mysql `START TRANSACTION`, ...). On `Err`
7906    /// from `f`, transactional backends roll back; on `Ok` they commit.
7907    ///
7908    /// The default impl is intentionally a no-op so non-transactional
7909    /// connection wrappers (e.g. test doubles) compile without
7910    /// implementation effort. Production backends MUST override.
7911    fn with_transaction<T, F>(&self, f: F) -> Result<T, IoError>
7912    where
7913        F: FnOnce(&Self) -> Result<T, IoError>,
7914        Self: Sized,
7915    {
7916        f(self)
7917    }
7918
7919    /// Quote a SQL identifier (table name, column name, schema name) for
7920    /// safe inclusion in a generated statement.
7921    ///
7922    /// Default impl: ANSI `"..."` form with embedded `"` doubled (matches
7923    /// SQLite + PostgreSQL). MySQL/MariaDB backends must override to
7924    /// produce `\`...\`` (backtick). MSSQL backends use `[...]`. Per
7925    /// br-frankenpandas-2y7w (fd90.10).
7926    ///
7927    /// NUL bytes in the identifier are rejected (security: prevents
7928    /// statement-injection via embedded null terminators in C-string
7929    /// driver layers).
7930    fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
7931        if ident.contains('\0') {
7932            return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
7933        }
7934        Ok(format!("\"{}\"", ident.replace('"', "\"\"")))
7935    }
7936
7937    /// Whether this backend exposes multi-schema namespacing.
7938    ///
7939    /// PostgreSQL / MySQL / MariaDB / MSSQL / Oracle: true. SQLite (one
7940    /// schema per file connection — though ATTACH adds named schemas as
7941    /// a special case): false. Drives the `pd.read_sql_table(.., schema=X)`
7942    /// path's choice between qualifying the table reference (`schema.table`)
7943    /// and a connection-level pre-SET. Per br-frankenpandas-6dk9 (fd90.13).
7944    fn supports_schemas(&self) -> bool {
7945        false
7946    }
7947
7948    /// Default schema for unqualified table references, if the backend
7949    /// has one.
7950    ///
7951    /// PostgreSQL: `'public'` (or whatever the connection's `search_path`
7952    /// resolves to first). MySQL: the database name passed to the
7953    /// connection URL. SQLite: `None` (single namespace). The `read_sql_table`
7954    /// dispatch uses this when the user passes `schema=None` to choose
7955    /// between a bare `SELECT * FROM \"table\"` and a schema-qualified form.
7956    /// Default `None` keeps behavior identical to today's SQLite-only path.
7957    fn default_schema(&self) -> Option<String> {
7958        None
7959    }
7960
7961    /// Schema-aware table-existence check.
7962    ///
7963    /// Per br-frankenpandas-70d1 (fd90.17). Default impl delegates to
7964    /// `table_exists(table)` and ignores the schema argument — matches
7965    /// single-namespace embedded backends like SQLite. Multi-schema
7966    /// backends (PostgreSQL, MySQL, MSSQL) override to scope the check
7967    /// to the requested schema, so write_sql's `Fail` branch correctly
7968    /// distinguishes `analytics.users` from `audit.users`. The schema
7969    /// arg passes through unchanged — backends MAY consult
7970    /// `default_schema()` for their own fallback logic when `schema`
7971    /// is `None` (per fd90.57: this fallback is NOT applied by the
7972    /// default impl or the SQLite override).
7973    fn table_exists_in_schema(
7974        &self,
7975        table_name: &str,
7976        _schema: Option<&str>,
7977    ) -> Result<bool, IoError> {
7978        self.table_exists(table_name)
7979    }
7980
7981    /// List user-visible table names, optionally scoped to `schema`.
7982    ///
7983    /// Per br-frankenpandas-vhq2 (fd90.20). Default impl returns an empty
7984    /// vector — backends that cannot introspect (or that haven't yet
7985    /// implemented this method) report "no tables visible" rather than
7986    /// raising. Multi-schema backends (PostgreSQL, MySQL, MSSQL) override
7987    /// to query their information_schema; embedded backends (SQLite)
7988    /// override to query their internal catalog and ignore `schema`.
7989    fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
7990        Ok(Vec::new())
7991    }
7992
7993    /// Introspect a table's column metadata, optionally schema-scoped.
7994    ///
7995    /// Per br-frankenpandas-w43q (fd90.21). Returns `Ok(None)` if the
7996    /// table does not exist. Default impl returns `Ok(None)` for
7997    /// backends that cannot introspect; rusqlite overrides to use
7998    /// `PRAGMA table_info`. Multi-schema backends override to query
7999    /// `information_schema.columns` filtered by `schema`. Schema arg is
8000    /// silently ignored when `supports_schemas() == false`.
8001    fn table_schema(
8002        &self,
8003        _table_name: &str,
8004        _schema: Option<&str>,
8005    ) -> Result<Option<SqlTableSchema>, IoError> {
8006        Ok(None)
8007    }
8008
8009    /// List user-visible schemas (PostgreSQL "schemas", MySQL "databases").
8010    ///
8011    /// Per br-frankenpandas-lxhi (fd90.22). Default impl returns an empty
8012    /// vector. Single-namespace backends (SQLite) return empty as well —
8013    /// they have no meaningful schema concept. Multi-schema backends
8014    /// (PostgreSQL, MySQL, MSSQL) override to query their catalog and
8015    /// filter out internal/system schemas (`pg_*`, `information_schema`,
8016    /// `mysql`, `performance_schema`, etc.) so user-visible schemas
8017    /// surface cleanly.
8018    fn list_schemas(&self) -> Result<Vec<String>, IoError> {
8019        Ok(Vec::new())
8020    }
8021
8022    /// Probe the backend server's version string.
8023    ///
8024    /// Per br-frankenpandas-e23k (fd90.24). Useful for dialect-version
8025    /// gating (INSERT ... RETURNING needs PG 8.2+ / SQLite 3.35.0+,
8026    /// JSON operators need MySQL 5.7.8+, etc.) and for diagnostics.
8027    /// Default impl returns `Ok(None)` so backends that can't probe
8028    /// (or that haven't yet implemented this) report "unknown" rather
8029    /// than raising. rusqlite override returns the SQLite library
8030    /// version. PostgreSQL/MySQL impls should override with
8031    /// `SHOW server_version` / `SELECT VERSION()`.
8032    fn server_version(&self) -> Result<Option<String>, IoError> {
8033        Ok(None)
8034    }
8035
8036    /// List user-visible view names, optionally scoped to `schema`.
8037    ///
8038    /// Per br-frankenpandas-gm3r (fd90.30). Default impl returns an
8039    /// empty vector. rusqlite override queries `sqlite_master WHERE
8040    /// type='view'`, excluding internal `sqlite_*` views. Multi-schema
8041    /// backends override with `information_schema.views` filtered by
8042    /// the schema arg. Schema is silently ignored when
8043    /// `supports_schemas() == false`. Companion to `list_tables` —
8044    /// pandas/SQLAlchemy keep tables and views in distinct buckets so
8045    /// `pd.read_sql_table` can distinguish them.
8046    fn list_views(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8047        Ok(Vec::new())
8048    }
8049
8050    /// List indexes defined on a table, optionally schema-scoped.
8051    ///
8052    /// Per br-frankenpandas-bgv9 (fd90.28). Default impl returns
8053    /// `Ok(Vec::new())` for backends that can't introspect. rusqlite
8054    /// override uses `PRAGMA index_list(table)` + `PRAGMA index_info`
8055    /// per index, surfacing only user-created indexes (the auto-created
8056    /// indexes for PRIMARY KEY constraints are filtered out to match
8057    /// SQLAlchemy.Inspector.get_indexes() semantics). Multi-schema
8058    /// backends override with information_schema queries.
8059    fn list_indexes(
8060        &self,
8061        _table_name: &str,
8062        _schema: Option<&str>,
8063    ) -> Result<Vec<SqlIndexSchema>, IoError> {
8064        Ok(Vec::new())
8065    }
8066
8067    /// List UNIQUE constraints declared on a table (inline or table-level),
8068    /// excluding `CREATE UNIQUE INDEX` indexes (those land in `list_indexes`).
8069    ///
8070    /// Per br-frankenpandas-sh4v (fd90.31). Default impl returns
8071    /// `Ok(Vec::new())`. rusqlite override uses
8072    /// `PRAGMA index_list(table)` filtered by `origin == 'u'` (the
8073    /// auto-created indexes that back declared UNIQUE constraints)
8074    /// then `PRAGMA index_info` per match. Multi-schema backends
8075    /// override with information_schema queries.
8076    fn list_unique_constraints(
8077        &self,
8078        _table_name: &str,
8079        _schema: Option<&str>,
8080    ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
8081        Ok(Vec::new())
8082    }
8083
8084    /// Probe the table-level comment, optionally schema-scoped.
8085    ///
8086    /// Per br-frankenpandas-yu3w (fd90.32). Default impl returns
8087    /// `Ok(None)` — SQLite has no native table-comment storage so it
8088    /// inherits the default. PostgreSQL impls should override using
8089    /// `pg_catalog.obj_description(...)` or
8090    /// `pg_catalog.pg_class.relkind` joined to `pg_description`;
8091    /// MySQL uses `information_schema.tables.table_comment`; MSSQL
8092    /// reads from `sys.extended_properties`. Aligns with
8093    /// `SQLAlchemy.Inspector.get_table_comment()` shape (returns
8094    /// `{'text': comment_or_none}`).
8095    fn table_comment(
8096        &self,
8097        _table_name: &str,
8098        _schema: Option<&str>,
8099    ) -> Result<Option<String>, IoError> {
8100        Ok(None)
8101    }
8102
8103    /// List foreign-key constraints declared on a table, optionally
8104    /// schema-scoped.
8105    ///
8106    /// Per br-frankenpandas-uht8 (fd90.29). Default impl returns
8107    /// `Ok(Vec::new())`. rusqlite override uses
8108    /// `PRAGMA foreign_key_list(table)`, grouping rows by their `id`
8109    /// column (each id is a single FK constraint that may span multiple
8110    /// columns) and ordering paired columns by `seq`. Multi-schema
8111    /// backends override with `information_schema.referential_constraints`
8112    /// + `key_column_usage` joined queries.
8113    fn list_foreign_keys(
8114        &self,
8115        _table_name: &str,
8116        _schema: Option<&str>,
8117    ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
8118        Ok(Vec::new())
8119    }
8120
8121    /// Return the primary-key column names for a table, ordered by
8122    /// the `primary_key_ordinal` reported by `table_schema`.
8123    ///
8124    /// Per br-frankenpandas-uw3y (fd90.25). Default impl delegates to
8125    /// `table_schema(table, schema)` and pulls out columns whose
8126    /// `primary_key_ordinal` is `Some(_)`, sorted ascending. Returns
8127    /// an empty vector when:
8128    /// - the table doesn't exist (`table_schema` returns `Ok(None)`),
8129    /// - the table has no primary key,
8130    /// - the backend can't introspect (default `table_schema`).
8131    ///
8132    /// Useful for upsert conflict-target generation, `index_label`
8133    /// defaulting, and schema validation. Backends can override to
8134    /// query their catalog directly when `table_schema` is too heavy.
8135    fn primary_key_columns(
8136        &self,
8137        table_name: &str,
8138        schema: Option<&str>,
8139    ) -> Result<Vec<String>, IoError> {
8140        // Per fd90.47: defer to the shared primary_keys_from_schema
8141        // helper so the filter+sort logic lives in exactly one place
8142        // (the helper is also used by SqlInspector::reflect_table).
8143        let Some(meta) = self.table_schema(table_name, schema)? else {
8144            return Ok(Vec::new());
8145        };
8146        Ok(primary_keys_from_schema(&meta))
8147    }
8148
8149    /// Reset a table to empty without dropping its definition.
8150    ///
8151    /// Per br-frankenpandas-phum (fd90.23). Default impl emits
8152    /// `DELETE FROM <table>` — universal SQL that every backend
8153    /// supports, but slower than TRUNCATE on large tables because the
8154    /// row deletes are logged in the transaction journal. PostgreSQL
8155    /// and MySQL backends should override with `TRUNCATE TABLE`,
8156    /// which is dramatically faster (DDL-style fast-path) and resets
8157    /// auto-increment sequences. The schema arg routes through
8158    /// `quote_identifier` and is silently ignored when
8159    /// `supports_schemas() == false`.
8160    fn truncate_table(&self, table_name: &str, schema: Option<&str>) -> Result<(), IoError> {
8161        validate_sql_table_name(table_name)?;
8162        validate_sql_table_ref_identifier_lengths(self, table_name, schema)?;
8163        let qualified = match schema {
8164            Some(s) if self.supports_schemas() => {
8165                validate_sql_schema_name(s)?;
8166                format!(
8167                    "{}.{}",
8168                    self.quote_identifier(s)?,
8169                    self.quote_identifier(table_name)?
8170                )
8171            }
8172            _ => self.quote_identifier(table_name)?,
8173        };
8174        self.execute_batch(&format!("DELETE FROM {qualified}"))
8175    }
8176}
8177
8178/// Map an fp-types DType to an SQLite column type declaration.
8179#[cfg(feature = "sql-sqlite")]
8180fn dtype_to_sql(dtype: DType) -> &'static str {
8181    match dtype {
8182        DType::Int64 | DType::Int64Nullable => "INTEGER",
8183        DType::Float64 => "REAL",
8184        DType::Utf8 => "TEXT",
8185        DType::Categorical => "TEXT",
8186        DType::Bool | DType::BoolNullable => "INTEGER",
8187        DType::Null => "TEXT",
8188        DType::Timedelta64 => "INTEGER", // store as nanoseconds
8189        DType::Datetime64 => "INTEGER",  // store as nanoseconds
8190        DType::Period => "INTEGER",      // store as ordinal
8191        DType::Interval => "TEXT",       // store as string
8192        DType::Sparse => "TEXT",
8193    }
8194}
8195
8196#[cfg(feature = "sql-sqlite")]
8197fn sqlite_decl_type_to_dtype(decl_type: &str) -> Option<DType> {
8198    let upper = decl_type.trim().to_ascii_uppercase();
8199    if upper.contains("INT") {
8200        Some(DType::Int64)
8201    } else if upper.contains("REAL") || upper.contains("FLOA") || upper.contains("DOUB") {
8202        Some(DType::Float64)
8203    } else if upper.contains("CHAR") || upper.contains("CLOB") || upper.contains("TEXT") {
8204        Some(DType::Utf8)
8205    } else {
8206        None
8207    }
8208}
8209
8210/// Convert an SQLite column value to a Scalar.
8211#[cfg(feature = "sql-sqlite")]
8212fn sql_value_to_scalar(value: &rusqlite::types::Value) -> Scalar {
8213    match value {
8214        rusqlite::types::Value::Null => Scalar::Null(NullKind::Null),
8215        rusqlite::types::Value::Integer(v) => Scalar::Int64(*v),
8216        rusqlite::types::Value::Real(v) => Scalar::Float64(*v),
8217        rusqlite::types::Value::Text(s) => Scalar::Utf8(s.clone()),
8218        rusqlite::types::Value::Blob(b) => Scalar::Utf8(format!("<blob:{} bytes>", b.len())),
8219    }
8220}
8221
8222#[cfg(feature = "sql-sqlite")]
8223fn sql_value_from_scalar(scalar: &Scalar) -> rusqlite::types::Value {
8224    match scalar {
8225        Scalar::Int64(v) => rusqlite::types::Value::Integer(*v),
8226        Scalar::Float64(v) => {
8227            if v.is_nan() {
8228                rusqlite::types::Value::Null
8229            } else {
8230                rusqlite::types::Value::Real(*v)
8231            }
8232        }
8233        Scalar::Bool(b) => rusqlite::types::Value::Integer(if *b { 1 } else { 0 }),
8234        Scalar::Utf8(s) => rusqlite::types::Value::Text(s.clone()),
8235        Scalar::Null(_) => rusqlite::types::Value::Null,
8236        Scalar::Timedelta64(v) => {
8237            if *v == Timedelta::NAT {
8238                rusqlite::types::Value::Null
8239            } else {
8240                rusqlite::types::Value::Integer(*v)
8241            }
8242        }
8243        Scalar::Datetime64(v) => {
8244            if *v == Timestamp::NAT {
8245                rusqlite::types::Value::Null
8246            } else {
8247                rusqlite::types::Value::Integer(*v)
8248            }
8249        }
8250        Scalar::Period(v) => {
8251            if *v == i64::MIN {
8252                rusqlite::types::Value::Null
8253            } else {
8254                rusqlite::types::Value::Integer(*v)
8255            }
8256        }
8257        Scalar::Interval(iv) => rusqlite::types::Value::Text(format!("{iv}")),
8258    }
8259}
8260
8261fn scalar_from_index_label(label: &IndexLabel) -> Scalar {
8262    match label {
8263        IndexLabel::Int64(v) => Scalar::Int64(*v),
8264        IndexLabel::Utf8(s) => Scalar::Utf8(s.clone()),
8265        // Typed-null label round-trips to the same-kind missing scalar.
8266        IndexLabel::Null(kind) => Scalar::Null(*kind),
8267        IndexLabel::Timedelta64(v) => {
8268            if *v == Timedelta::NAT {
8269                Scalar::Null(NullKind::Null)
8270            } else {
8271                Scalar::Timedelta64(*v)
8272            }
8273        }
8274        IndexLabel::Datetime64(v) => {
8275            if *v == i64::MIN {
8276                Scalar::Null(NullKind::Null)
8277            } else {
8278                Scalar::Utf8(format_datetime_ns(*v))
8279            }
8280        }
8281    }
8282}
8283
8284#[cfg(feature = "sql-sqlite")]
8285impl SqlConnection for rusqlite::Connection {
8286    fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
8287        let mut stmt = self
8288            .prepare(query)
8289            .map_err(|e| IoError::Sql(format!("prepare failed: {e}")))?;
8290
8291        let col_count = stmt.column_count();
8292        let columns: Vec<String> = (0..col_count)
8293            .map(|i| stmt.column_name(i).unwrap_or("?").to_owned())
8294            .collect();
8295
8296        let sql_params = params.iter().map(sql_value_from_scalar).collect::<Vec<_>>();
8297        let mut rows = stmt
8298            .query(rusqlite::params_from_iter(sql_params.iter()))
8299            .map_err(|e| IoError::Sql(format!("query failed: {e}")))?;
8300
8301        let mut out_rows = Vec::new();
8302        while let Some(row) = rows
8303            .next()
8304            .map_err(|e| IoError::Sql(format!("row fetch failed: {e}")))?
8305        {
8306            let mut values = Vec::with_capacity(col_count);
8307            for col_idx in 0..col_count {
8308                let value: rusqlite::types::Value = row
8309                    .get(col_idx)
8310                    .map_err(|e| IoError::Sql(format!("cell read failed: {e}")))?;
8311                values.push(sql_value_to_scalar(&value));
8312            }
8313            out_rows.push(values);
8314        }
8315
8316        Ok(SqlQueryResult {
8317            columns,
8318            rows: out_rows,
8319        })
8320    }
8321
8322    fn query_column_dtypes(
8323        &self,
8324        query: &str,
8325        _params: &[Scalar],
8326    ) -> Result<Vec<Option<DType>>, IoError> {
8327        let stmt = self
8328            .prepare(query)
8329            .map_err(|e| IoError::Sql(format!("prepare failed: {e}")))?;
8330        Ok(stmt
8331            .columns()
8332            .into_iter()
8333            .map(|column| column.decl_type().and_then(sqlite_decl_type_to_dtype))
8334            .collect())
8335    }
8336
8337    fn supports_paged_sql_chunks(&self) -> bool {
8338        true
8339    }
8340
8341    fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
8342        rusqlite::Connection::execute_batch(self, sql)
8343            .map_err(|e| IoError::Sql(format!("execute_batch failed: {e}")))
8344    }
8345
8346    fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
8347        self.prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name=?1")
8348            .and_then(|mut stmt| stmt.exists(rusqlite::params![table_name]))
8349            .map_err(|e| IoError::Sql(format!("existence check failed: {e}")))
8350    }
8351
8352    fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
8353        let tx = self
8354            .unchecked_transaction()
8355            .map_err(|e| IoError::Sql(format!("begin transaction failed: {e}")))?;
8356
8357        {
8358            let mut stmt = tx
8359                .prepare_cached(insert_sql)
8360                .map_err(|e| IoError::Sql(format!("prepare insert failed: {e}")))?;
8361
8362            for (row_idx, row_values) in rows.iter().enumerate() {
8363                let params = row_values
8364                    .iter()
8365                    .map(sql_value_from_scalar)
8366                    .collect::<Vec<_>>();
8367                stmt.execute(rusqlite::params_from_iter(params.iter()))
8368                    .map_err(|e| IoError::Sql(format!("insert row {row_idx} failed: {e}")))?;
8369            }
8370        }
8371
8372        tx.commit()
8373            .map_err(|e| IoError::Sql(format!("commit failed: {e}")))?;
8374        Ok(())
8375    }
8376
8377    fn dtype_sql(&self, dtype: DType) -> &'static str {
8378        dtype_to_sql(dtype)
8379    }
8380
8381    fn index_dtype_sql(&self, index: &Index) -> &'static str {
8382        sql_dtype_from_index(index)
8383    }
8384
8385    // br-frankenpandas-6dtf: backend-capability + dialect probes.
8386    fn dialect_name(&self) -> &'static str {
8387        "sqlite"
8388    }
8389
8390    fn supports_returning(&self) -> bool {
8391        // SQLite 3.35.0+ (released March 2021) supports INSERT ... RETURNING.
8392        // rusqlite ships with bundled SQLite >= 3.45 by default, so we can
8393        // unconditionally claim support here.
8394        true
8395    }
8396
8397    fn max_param_count(&self) -> Option<usize> {
8398        // SQLite default SQLITE_MAX_VARIABLE_NUMBER is 32766 since 3.32.0.
8399        // (Older builds capped at 999.) rusqlite bundled SQLite is current,
8400        // so this matches.
8401        Some(32766)
8402    }
8403
8404    fn with_transaction<T, F>(&self, f: F) -> Result<T, IoError>
8405    where
8406        F: FnOnce(&Self) -> Result<T, IoError>,
8407        Self: Sized,
8408    {
8409        struct RollbackOnDrop<'conn> {
8410            conn: &'conn rusqlite::Connection,
8411            active: bool,
8412        }
8413
8414        impl Drop for RollbackOnDrop<'_> {
8415            fn drop(&mut self) {
8416                if self.active {
8417                    let _ = rusqlite::Connection::execute_batch(self.conn, "ROLLBACK");
8418                }
8419            }
8420        }
8421
8422        // rusqlite's pure-trait `Self: Sized` constraint means we operate on
8423        // `&rusqlite::Connection` directly without taking the `&mut` that
8424        // `Connection::transaction()` requires. We emulate the same
8425        // BEGIN/COMMIT semantics with explicit pragmas. The guard keeps the
8426        // connection from retaining a write transaction if the callback
8427        // panics before we reach the explicit rollback/commit paths.
8428        self.execute_batch("BEGIN")
8429            .map_err(|e| IoError::Sql(format!("begin transaction failed: {e}")))?;
8430        let mut rollback = RollbackOnDrop {
8431            conn: self,
8432            active: true,
8433        };
8434        match f(self) {
8435            Ok(result) => {
8436                self.execute_batch("COMMIT")
8437                    .map_err(|e| IoError::Sql(format!("commit transaction failed: {e}")))?;
8438                rollback.active = false;
8439                Ok(result)
8440            }
8441            Err(err) => {
8442                // Best-effort rollback; surface the original error if rollback
8443                // also fails (rollback failure is logged via the Sql error
8444                // variant for diagnostics but the user wants the closure
8445                // error preserved as the primary signal).
8446                if self.execute_batch("ROLLBACK").is_ok() {
8447                    rollback.active = false;
8448                }
8449                Err(err)
8450            }
8451        }
8452    }
8453
8454    fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
8455        // SQLite accepts ANSI double-quotes for identifiers (it ALSO accepts
8456        // backticks for MySQL compat, but ANSI is the recommended form per
8457        // SQLite docs). Delegate to the existing free-fn helper to keep the
8458        // exact escaping policy in one place.
8459        quote_sql_ident(ident)
8460    }
8461
8462    fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8463        // SQLite has a single namespace; the schema arg is silently
8464        // ignored to match `supports_schemas() == false`. We exclude
8465        // SQLite's internal `sqlite_*` book-keeping tables to match
8466        // pandas' SQLAlchemy dialect, which never surfaces them as
8467        // user tables.
8468        // Per fd90.50: ESCAPE '\' makes the `_` in 'sqlite\_%' a literal
8469        // underscore instead of a SQL LIKE single-char wildcard. Without
8470        // the escape, a user table named e.g. `sqliteX` would be
8471        // incorrectly excluded because the `_` matches any single char.
8472        let mut stmt = self
8473            .prepare(
8474                r"SELECT name FROM sqlite_master
8475                 WHERE type='table' AND name NOT LIKE 'sqlite\_%' ESCAPE '\'
8476                 ORDER BY name",
8477            )
8478            .map_err(|e| IoError::Sql(format!("list_tables prepare failed: {e}")))?;
8479        let names = stmt
8480            .query_map([], |row| row.get::<_, String>(0))
8481            .map_err(|e| IoError::Sql(format!("list_tables query failed: {e}")))?
8482            .collect::<Result<Vec<_>, _>>()
8483            .map_err(|e| IoError::Sql(format!("list_tables row read failed: {e}")))?;
8484        Ok(names)
8485    }
8486
8487    fn list_views(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
8488        // Same single-namespace policy as list_tables; type='view'
8489        // distinguishes the two buckets in sqlite_master.
8490        // Per fd90.50: same ESCAPE '\' fix as list_tables to treat
8491        // the underscore in 'sqlite_' as a literal.
8492        let mut stmt = self
8493            .prepare(
8494                r"SELECT name FROM sqlite_master
8495                 WHERE type='view' AND name NOT LIKE 'sqlite\_%' ESCAPE '\'
8496                 ORDER BY name",
8497            )
8498            .map_err(|e| IoError::Sql(format!("list_views prepare failed: {e}")))?;
8499        let names = stmt
8500            .query_map([], |row| row.get::<_, String>(0))
8501            .map_err(|e| IoError::Sql(format!("list_views query failed: {e}")))?
8502            .collect::<Result<Vec<_>, _>>()
8503            .map_err(|e| IoError::Sql(format!("list_views row read failed: {e}")))?;
8504        Ok(names)
8505    }
8506
8507    fn table_schema(
8508        &self,
8509        table_name: &str,
8510        _schema: Option<&str>,
8511    ) -> Result<Option<SqlTableSchema>, IoError> {
8512        // Validate the table name first — PRAGMA table_info doesn't
8513        // accept parameter binding, so we must reject anything that
8514        // could break out of the identifier slot.
8515        validate_sql_table_name(table_name)?;
8516        // PRAGMA table_info returns: cid, name, type, notnull, dflt_value, pk.
8517        let pragma = format!("PRAGMA table_info(\"{}\")", table_name.replace('"', "\"\""));
8518        let mut stmt = self
8519            .prepare(&pragma)
8520            .map_err(|e| IoError::Sql(format!("table_schema prepare failed: {e}")))?;
8521        // PRAGMA table_info row tuple: (name, type, notnull, dflt_value, pk).
8522        // Type alias keeps clippy::type_complexity happy on the
8523        // intermediate Vec used for the two-pass autoincrement detection.
8524        type ColumnInfoRow = (String, Option<String>, i64, Option<String>, i64);
8525        let raw_rows: Vec<ColumnInfoRow> = stmt
8526            .query_map([], |row| {
8527                Ok((
8528                    row.get::<_, String>(1)?,
8529                    row.get::<_, Option<String>>(2)?,
8530                    row.get::<_, i64>(3)?,
8531                    row.get::<_, Option<String>>(4)?,
8532                    row.get::<_, i64>(5)?,
8533                ))
8534            })
8535            .map_err(|e| IoError::Sql(format!("table_schema query failed: {e}")))?
8536            .collect::<Result<Vec<_>, _>>()
8537            .map_err(|e| IoError::Sql(format!("table_schema row read failed: {e}")))?;
8538
8539        // Per fd90.42 (refines fd90.37): SQLite's rowid-alias rule
8540        // requires the column to be the SOLE primary key — i.e. exactly
8541        // one row in PRAGMA table_info has pk > 0. Composite PKs (where
8542        // multiple columns have pk > 0) never qualify, even if the
8543        // first column is INTEGER. So we count single-PK status across
8544        // the table before deciding any column's autoincrement bit.
8545        let pk_count = raw_rows.iter().filter(|(_, _, _, _, pk)| *pk > 0).count();
8546        let single_pk = pk_count == 1;
8547
8548        let mut columns: Vec<SqlColumnSchema> = Vec::with_capacity(raw_rows.len());
8549        for (name, declared, notnull, dflt, pk) in raw_rows {
8550            let cleaned_type = declared.filter(|s| !s.is_empty());
8551            let autoincrement = single_pk
8552                && pk == 1
8553                && cleaned_type
8554                    .as_deref()
8555                    .map(|t| t.eq_ignore_ascii_case("INTEGER"))
8556                    .unwrap_or(false);
8557            columns.push(SqlColumnSchema {
8558                name,
8559                declared_type: cleaned_type,
8560                nullable: notnull == 0,
8561                default_value: dflt,
8562                primary_key_ordinal: if pk > 0 {
8563                    Some(usize::try_from(pk - 1).unwrap_or(0))
8564                } else {
8565                    None
8566                },
8567                comment: None,
8568                autoincrement,
8569            });
8570        }
8571        if columns.is_empty() {
8572            // PRAGMA table_info on a non-existent table returns 0 rows
8573            // without erroring; map that to None so callers can
8574            // distinguish missing tables from empty ones.
8575            Ok(None)
8576        } else {
8577            Ok(Some(SqlTableSchema {
8578                table_name: table_name.to_owned(),
8579                columns,
8580            }))
8581        }
8582    }
8583
8584    fn server_version(&self) -> Result<Option<String>, IoError> {
8585        // sqlite_version() is a built-in scalar that returns the
8586        // SQLite library version string (e.g. "3.45.1").
8587        let version: String = self
8588            .query_row("SELECT sqlite_version()", [], |row| row.get(0))
8589            .map_err(|e| IoError::Sql(format!("server_version query failed: {e}")))?;
8590        Ok(Some(version))
8591    }
8592
8593    fn list_indexes(
8594        &self,
8595        table_name: &str,
8596        _schema: Option<&str>,
8597    ) -> Result<Vec<SqlIndexSchema>, IoError> {
8598        validate_sql_table_name(table_name)?;
8599        // PRAGMA index_list(table) returns: seq, name, unique, origin, partial.
8600        // origin is 'c' for CREATE INDEX (user), 'pk' for PRIMARY KEY auto,
8601        // 'u' for UNIQUE constraint auto. SQLAlchemy.Inspector surfaces
8602        // only the user-created ones, so we filter out 'pk' to match.
8603        let pragma_list = format!("PRAGMA index_list(\"{}\")", table_name.replace('"', "\"\""));
8604        let mut list_stmt = self
8605            .prepare(&pragma_list)
8606            .map_err(|e| IoError::Sql(format!("list_indexes prepare failed: {e}")))?;
8607        let index_meta = list_stmt
8608            .query_map([], |row| {
8609                Ok((
8610                    row.get::<_, String>(1)?, // name
8611                    row.get::<_, i64>(2)?,    // unique flag
8612                    row.get::<_, String>(3)?, // origin
8613                ))
8614            })
8615            .map_err(|e| IoError::Sql(format!("list_indexes query failed: {e}")))?
8616            .collect::<Result<Vec<_>, _>>()
8617            .map_err(|e| IoError::Sql(format!("list_indexes row read failed: {e}")))?;
8618
8619        let mut indexes = Vec::new();
8620        for (name, uniq, origin) in index_meta {
8621            if origin == "pk" {
8622                // Auto-created PK index — pandas/SQLAlchemy hide it.
8623                continue;
8624            }
8625            if origin == "u" {
8626                // Auto-created index backing a declared UNIQUE
8627                // constraint — surfaced via list_unique_constraints
8628                // (fd90.31), not here, to match SQLAlchemy disjoint
8629                // bucketing between get_indexes and
8630                // get_unique_constraints.
8631                continue;
8632            }
8633            // PRAGMA index_info(idx) returns: seqno, cid, column_name (col2 may
8634            // be NULL for expression-based indexes — skip those rather than
8635            // surfacing partial column lists).
8636            let pragma_info = format!("PRAGMA index_info(\"{}\")", name.replace('"', "\"\""));
8637            let mut info_stmt = self
8638                .prepare(&pragma_info)
8639                .map_err(|e| IoError::Sql(format!("index_info prepare failed: {e}")))?;
8640            let cols = info_stmt
8641                .query_map([], |row| {
8642                    Ok((row.get::<_, i64>(0)?, row.get::<_, Option<String>>(2)?))
8643                })
8644                .map_err(|e| IoError::Sql(format!("index_info query failed: {e}")))?
8645                .collect::<Result<Vec<_>, _>>()
8646                .map_err(|e| IoError::Sql(format!("index_info row read failed: {e}")))?;
8647            // Skip expression-based indexes (any column_name is NULL).
8648            if cols.iter().any(|(_, c)| c.is_none()) {
8649                continue;
8650            }
8651            let mut sorted: Vec<(i64, String)> = cols
8652                .into_iter()
8653                .map(|(seq, c)| (seq, c.unwrap_or_default()))
8654                .collect();
8655            sorted.sort_by_key(|(seq, _)| *seq);
8656            indexes.push(SqlIndexSchema {
8657                name,
8658                columns: sorted.into_iter().map(|(_, c)| c).collect(),
8659                unique: uniq != 0,
8660            });
8661        }
8662        Ok(indexes)
8663    }
8664
8665    fn list_unique_constraints(
8666        &self,
8667        table_name: &str,
8668        _schema: Option<&str>,
8669    ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
8670        validate_sql_table_name(table_name)?;
8671        // PRAGMA index_list(table) origin column:
8672        //   'c' = CREATE INDEX (user) — surfaces via list_indexes
8673        //   'u' = UNIQUE constraint   — surfaces here
8674        //   'pk' = PRIMARY KEY auto   — surfaces via primary_key_columns
8675        let pragma_list = format!("PRAGMA index_list(\"{}\")", table_name.replace('"', "\"\""));
8676        let mut list_stmt = self
8677            .prepare(&pragma_list)
8678            .map_err(|e| IoError::Sql(format!("list_unique_constraints prepare failed: {e}")))?;
8679        let candidates = list_stmt
8680            .query_map([], |row| {
8681                Ok((
8682                    row.get::<_, String>(1)?, // index name
8683                    row.get::<_, String>(3)?, // origin
8684                ))
8685            })
8686            .map_err(|e| IoError::Sql(format!("list_unique_constraints query failed: {e}")))?
8687            .collect::<Result<Vec<_>, _>>()
8688            .map_err(|e| IoError::Sql(format!("list_unique_constraints row read failed: {e}")))?;
8689
8690        let mut constraints = Vec::new();
8691        for (name, origin) in candidates {
8692            if origin != "u" {
8693                continue;
8694            }
8695            let pragma_info = format!("PRAGMA index_info(\"{}\")", name.replace('"', "\"\""));
8696            let mut info_stmt = self
8697                .prepare(&pragma_info)
8698                .map_err(|e| IoError::Sql(format!("uq index_info prepare failed: {e}")))?;
8699            let cols = info_stmt
8700                .query_map([], |row| {
8701                    Ok((row.get::<_, i64>(0)?, row.get::<_, Option<String>>(2)?))
8702                })
8703                .map_err(|e| IoError::Sql(format!("uq index_info query failed: {e}")))?
8704                .collect::<Result<Vec<_>, _>>()
8705                .map_err(|e| IoError::Sql(format!("uq index_info row read failed: {e}")))?;
8706            // Skip expression-based unique constraints (column NULL).
8707            if cols.iter().any(|(_, c)| c.is_none()) {
8708                continue;
8709            }
8710            let mut sorted: Vec<(i64, String)> = cols
8711                .into_iter()
8712                .map(|(seq, c)| (seq, c.unwrap_or_default()))
8713                .collect();
8714            sorted.sort_by_key(|(seq, _)| *seq);
8715            constraints.push(SqlUniqueConstraintSchema {
8716                name,
8717                columns: sorted.into_iter().map(|(_, c)| c).collect(),
8718            });
8719        }
8720        Ok(constraints)
8721    }
8722
8723    fn list_foreign_keys(
8724        &self,
8725        table_name: &str,
8726        _schema: Option<&str>,
8727    ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
8728        // PRAGMA foreign_key_list rows: (seq, referenced_table, from_col, to_col).
8729        // The constraint id is the BTreeMap key; we don't repeat it inside the
8730        // value tuple. Type alias keeps clippy::type_complexity happy and
8731        // the grouping logic readable.
8732        type FkRow = (i64, String, String, Option<String>);
8733
8734        validate_sql_table_name(table_name)?;
8735        // PRAGMA foreign_key_list(table) returns: id, seq, table, from, to,
8736        // on_update, on_delete, match. Each `id` is one FK constraint;
8737        // multiple rows with the same id describe a composite FK.
8738        let pragma = format!(
8739            "PRAGMA foreign_key_list(\"{}\")",
8740            table_name.replace('"', "\"\"")
8741        );
8742        let mut stmt = self
8743            .prepare(&pragma)
8744            .map_err(|e| IoError::Sql(format!("list_foreign_keys prepare failed: {e}")))?;
8745        let rows: Vec<(i64, FkRow)> = stmt
8746            .query_map([], |row| {
8747                Ok((
8748                    row.get::<_, i64>(0)?, // id
8749                    (
8750                        row.get::<_, i64>(1)?,            // seq
8751                        row.get::<_, String>(2)?,         // referenced table
8752                        row.get::<_, String>(3)?,         // from column
8753                        row.get::<_, Option<String>>(4)?, // to column (nullable)
8754                    ),
8755                ))
8756            })
8757            .map_err(|e| IoError::Sql(format!("list_foreign_keys query failed: {e}")))?
8758            .collect::<Result<Vec<_>, _>>()
8759            .map_err(|e| IoError::Sql(format!("list_foreign_keys row read failed: {e}")))?;
8760
8761        // Group by id; preserve discovery order across distinct ids.
8762        let mut order: Vec<i64> = Vec::new();
8763        let mut grouped: std::collections::BTreeMap<i64, Vec<FkRow>> =
8764            std::collections::BTreeMap::new();
8765        for (id, fk_row) in rows {
8766            let (seq, ref_table, from_col, to_col) = fk_row;
8767            if !grouped.contains_key(&id) {
8768                order.push(id);
8769            }
8770            grouped
8771                .entry(id)
8772                .or_default()
8773                .push((seq, ref_table, from_col, to_col));
8774        }
8775
8776        let mut fks = Vec::with_capacity(order.len());
8777        for id in order {
8778            let mut group = grouped.remove(&id).unwrap_or_default();
8779            group.sort_by_key(|(seq, _, _, _)| *seq);
8780            let ref_table = group
8781                .first()
8782                .map(|(_, t, _, _)| t.clone())
8783                .unwrap_or_default();
8784            let mut columns = Vec::with_capacity(group.len());
8785            let mut referenced_columns: Vec<Option<String>> = Vec::with_capacity(group.len());
8786            for (_, _, from_col, to_col) in group {
8787                columns.push(from_col);
8788                referenced_columns.push(to_col);
8789            }
8790            // Per fd90.44: when ALL `to` columns are NULL, the user
8791            // declared `FOREIGN KEY (cols) REFERENCES parent` (implicit
8792            // reference to parent's PK). Resolve by looking up the
8793            // parent's primary key columns. SQLAlchemy.Inspector
8794            // surfaces these as resolved-to-PK references; matching
8795            // that behavior keeps callers from missing real FKs.
8796            let resolved_columns: Vec<String> = if referenced_columns.iter().all(Option::is_none) {
8797                // Implicit-PK reference: look up parent's PK.
8798                let pk = self.primary_key_columns(&ref_table, None)?;
8799                if pk.len() == columns.len() {
8800                    pk
8801                } else {
8802                    // Parent PK shape doesn't match FK column count
8803                    // (parent has no PK, or composite mismatch). Skip
8804                    // — fabricating columns would mislead callers
8805                    // worse than hiding the FK.
8806                    continue;
8807                }
8808            } else if referenced_columns.iter().all(Option::is_some) {
8809                // Fully explicit: every column has a resolved 'to'.
8810                referenced_columns.into_iter().flatten().collect()
8811            } else {
8812                // Mixed Some/None: SQLite shouldn't produce this for
8813                // a single FK group, but if it ever does, skip rather
8814                // than mispair.
8815                continue;
8816            };
8817            fks.push(SqlForeignKeySchema {
8818                // SQLite PRAGMA foreign_key_list does not surface a
8819                // CONSTRAINT name; pandas/SQLAlchemy report None there too.
8820                constraint_name: None,
8821                columns,
8822                referenced_table: ref_table,
8823                referenced_columns: resolved_columns,
8824            });
8825        }
8826        Ok(fks)
8827    }
8828}
8829
8830#[cfg(feature = "sql-sqlite")]
8831fn sql_dtype_from_index(index: &Index) -> &'static str {
8832    for label in index.labels() {
8833        match label {
8834            IndexLabel::Int64(_) => return "INTEGER",
8835            IndexLabel::Utf8(_) => return "TEXT",
8836            IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "INTEGER",
8837            IndexLabel::Datetime64(v) if *v != i64::MIN => return "TEXT",
8838            _ => {}
8839        }
8840    }
8841    "TEXT"
8842}
8843
8844fn resolve_sql_index_label(
8845    frame: &DataFrame,
8846    options: &SqlWriteOptions,
8847) -> Result<Option<String>, IoError> {
8848    if !options.index {
8849        return Ok(None);
8850    }
8851
8852    let label = options
8853        .index_label
8854        .clone()
8855        .or_else(|| frame.index().name().map(str::to_owned))
8856        .unwrap_or_else(|| "index".to_owned());
8857
8858    if frame.column(&label).is_some() {
8859        return Err(IoError::DuplicateColumnName(label));
8860    }
8861
8862    Ok(Some(label))
8863}
8864
8865// Per br-frankenpandas-ld8h (fd90.45): these helpers are only called
8866// from the rusqlite SqlConnection impl, which is gated behind
8867// `feature = "sql-sqlite"`. Mirroring the gate here keeps
8868// --no-default-features builds clean of dead-code warnings.
8869#[cfg(feature = "sql-sqlite")]
8870fn escape_sql_ident(name: &str) -> Result<String, IoError> {
8871    if name.contains('\0') {
8872        return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
8873    }
8874    Ok(name.replace('"', "\"\""))
8875}
8876
8877#[cfg(feature = "sql-sqlite")]
8878fn quote_sql_ident(name: &str) -> Result<String, IoError> {
8879    Ok(format!("\"{}\"", escape_sql_ident(name)?))
8880}
8881
8882/// Per br-frankenpandas-4l7a (fd90.55): shared identifier-shape
8883/// validator used by `validate_sql_table_name` and
8884/// `validate_sql_column_name`. `kind` is the user-facing label
8885/// inserted into the error message ("table", "column", ...). The
8886/// rule is the same for both: non-empty, ASCII-alphanumeric or
8887/// underscore only — defense in depth alongside `quote_identifier`
8888/// (which handles embedded quotes but doesn't reject other shapes).
8889fn validate_sql_ident(name: &str, kind: &str) -> Result<(), IoError> {
8890    if name.is_empty() || !name.chars().all(|c| c.is_alphanumeric() || c == '_') {
8891        return Err(IoError::Sql(format!(
8892            "invalid {kind} name: '{name}' (must be non-empty, only alphanumeric and underscore allowed)"
8893        )));
8894    }
8895    Ok(())
8896}
8897
8898fn validate_sql_table_name(table_name: &str) -> Result<(), IoError> {
8899    validate_sql_ident(table_name, "table")
8900}
8901
8902/// Per br-frankenpandas-597l (fd90.56): dedicated schema-name
8903/// validator so error messages correctly identify the invalid
8904/// identifier as a schema rather than a table. Same alphanumeric+
8905/// underscore rule as table/column names.
8906fn validate_sql_schema_name(schema: &str) -> Result<(), IoError> {
8907    validate_sql_ident(schema, "schema")
8908}
8909
8910/// Validate `name` against the backend's identifier-length cap.
8911///
8912/// Per br-frankenpandas-9ynk (fd90.27). When `max` is `Some(n)`, errors
8913/// out when `name.len() > n`. When `max` is `None`, accepts any length
8914/// (e.g. SQLite, where the engine has no documented limit). `kind` is
8915/// the user-facing label used in the error message ("table", "column",
8916/// "index label", ...) so misuse points cleanly back to the offending
8917/// identifier without callers having to format the message.
8918fn validate_sql_identifier_length(
8919    name: &str,
8920    max: Option<usize>,
8921    kind: &str,
8922) -> Result<(), IoError> {
8923    if let Some(limit) = max
8924        && name.len() > limit
8925    {
8926        return Err(IoError::Sql(format!(
8927            "invalid {kind} name '{name}': length {len} exceeds backend identifier limit ({limit})",
8928            len = name.len()
8929        )));
8930    }
8931    Ok(())
8932}
8933
8934fn validate_sql_table_ref_identifier_lengths<C: SqlConnection + ?Sized>(
8935    conn: &C,
8936    table_name: &str,
8937    schema: Option<&str>,
8938) -> Result<(), IoError> {
8939    let max = conn.max_identifier_length();
8940    validate_sql_identifier_length(table_name, max, "table")?;
8941    if let Some(s) = schema {
8942        validate_sql_identifier_length(s, max, "schema")?;
8943    }
8944    Ok(())
8945}
8946
8947fn validate_sql_column_identifier_lengths<C, I, S>(conn: &C, names: I) -> Result<(), IoError>
8948where
8949    C: SqlConnection + ?Sized,
8950    I: IntoIterator<Item = S>,
8951    S: AsRef<str>,
8952{
8953    let max = conn.max_identifier_length();
8954    for name in names {
8955        validate_sql_identifier_length(name.as_ref(), max, "column")?;
8956    }
8957    Ok(())
8958}
8959
8960fn sql_select_all_query<C: SqlConnection>(conn: &C, table_name: &str) -> Result<String, IoError> {
8961    sql_select_all_query_in_schema(conn, table_name, None)
8962}
8963
8964/// Build a `SELECT * FROM ...` statement, optionally schema-qualified.
8965///
8966/// Per br-frankenpandas-u6zn (fd90.14). When `schema` is `Some(s)` AND
8967/// `conn.supports_schemas()`, the FROM clause becomes `\"schema\".\"table\"`.
8968/// When `supports_schemas` returns false, any `Some(s)` is rejected before
8969/// query generation so `read_sql_table(schema=...)` matches pandas' fail-closed
8970/// SQLite behavior.
8971fn sql_select_all_query_in_schema<C: SqlConnection>(
8972    conn: &C,
8973    table_name: &str,
8974    schema: Option<&str>,
8975) -> Result<String, IoError> {
8976    validate_sql_table_name(table_name)?;
8977    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
8978    let qualified = match schema {
8979        Some(s) => {
8980            validate_sql_schema_name(s)?;
8981            if !conn.supports_schemas() {
8982                return Err(IoError::Sql(format!(
8983                    "read_sql_table: schema is not supported by {} backend",
8984                    conn.dialect_name()
8985                )));
8986            }
8987            format!(
8988                "{}.{}",
8989                conn.quote_identifier(s)?,
8990                conn.quote_identifier(table_name)?
8991            )
8992        }
8993        _ => conn.quote_identifier(table_name)?,
8994    };
8995    Ok(format!("SELECT * FROM {qualified}"))
8996}
8997
8998fn validate_sql_column_name(column_name: &str) -> Result<(), IoError> {
8999    validate_sql_ident(column_name, "column")
9000}
9001
9002fn sql_select_columns_query<C: SqlConnection>(
9003    conn: &C,
9004    table_name: &str,
9005    columns: &[&str],
9006) -> Result<String, IoError> {
9007    sql_select_columns_query_in_schema(conn, table_name, None, columns)
9008}
9009
9010/// Build a `SELECT col1, col2, ... FROM ...` statement, optionally
9011/// schema-qualified.
9012///
9013/// Per br-frankenpandas-d3e9 (fd90.34). Companion to
9014/// `sql_select_all_query_in_schema`. Same schema rules: when
9015/// `schema` is `Some(s)` AND `conn.supports_schemas()`, the FROM
9016/// clause becomes `\"schema\".\"table\"`; when `supports_schemas()`
9017/// returns false, the request is rejected before query generation.
9018fn sql_select_columns_query_in_schema<C: SqlConnection>(
9019    conn: &C,
9020    table_name: &str,
9021    schema: Option<&str>,
9022    columns: &[&str],
9023) -> Result<String, IoError> {
9024    validate_sql_table_name(table_name)?;
9025    if columns.is_empty() {
9026        return Err(IoError::Sql(
9027            "read_sql_table_columns: columns must be non-empty".to_owned(),
9028        ));
9029    }
9030    for name in columns {
9031        validate_sql_column_name(name)?;
9032    }
9033    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9034    validate_sql_column_identifier_lengths(conn, columns)?;
9035
9036    let qualified = match schema {
9037        Some(s) => {
9038            validate_sql_schema_name(s)?;
9039            if !conn.supports_schemas() {
9040                return Err(IoError::Sql(format!(
9041                    "read_sql_table: schema is not supported by {} backend",
9042                    conn.dialect_name()
9043                )));
9044            }
9045            format!(
9046                "{}.{}",
9047                conn.quote_identifier(s)?,
9048                conn.quote_identifier(table_name)?
9049            )
9050        }
9051        _ => conn.quote_identifier(table_name)?,
9052    };
9053    let projection: Vec<String> = columns
9054        .iter()
9055        .map(|name| conn.quote_identifier(name))
9056        .collect::<Result<_, _>>()?;
9057    Ok(format!(
9058        "SELECT {} FROM {}",
9059        projection.join(", "),
9060        qualified
9061    ))
9062}
9063
9064fn sql_column_definition<C: SqlConnection>(
9065    conn: &C,
9066    column_name: &str,
9067    sql_type: &str,
9068) -> Result<String, IoError> {
9069    Ok(format!(
9070        "{} {sql_type}",
9071        conn.quote_identifier(column_name)?
9072    ))
9073}
9074
9075// ============================================================================
9076// PostgreSQL SqlConnection Implementation (feature = "sql-postgresql")
9077// ============================================================================
9078
9079#[cfg(any(feature = "sql-postgresql", feature = "sql-mysql"))]
9080use std::cell::RefCell;
9081
9082/// Wrapper around `postgres::Client` providing interior mutability for the
9083/// `SqlConnection` trait (which requires `&self`).
9084#[cfg(feature = "sql-postgresql")]
9085pub struct PostgresConnection {
9086    client: RefCell<postgres::Client>,
9087}
9088
9089#[cfg(feature = "sql-postgresql")]
9090impl PostgresConnection {
9091    pub fn new(client: postgres::Client) -> Self {
9092        Self {
9093            client: RefCell::new(client),
9094        }
9095    }
9096}
9097
9098#[cfg(feature = "sql-postgresql")]
9099impl SqlConnection for PostgresConnection {
9100    fn query(&self, query_str: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
9101        use postgres::types::ToSql;
9102
9103        let pg_params: Vec<Box<dyn ToSql + Sync>> = params
9104            .iter()
9105            .map(|s| -> Box<dyn ToSql + Sync> {
9106                match s {
9107                    Scalar::Null(_) => Box::new(Option::<i64>::None),
9108                    Scalar::Bool(b) => Box::new(*b),
9109                    Scalar::Int64(i) => Box::new(*i),
9110                    Scalar::Float64(f) => Box::new(*f),
9111                    Scalar::Utf8(s) => Box::new(s.clone()),
9112                    _ => Box::new(Option::<i64>::None),
9113                }
9114            })
9115            .collect();
9116
9117        let param_refs: Vec<&(dyn ToSql + Sync)> = pg_params.iter().map(|b| b.as_ref()).collect();
9118        let rows = self
9119            .client
9120            .borrow_mut()
9121            .query(query_str, &param_refs)
9122            .map_err(|e| IoError::Sql(format!("PostgreSQL query failed: {e}")))?;
9123
9124        if rows.is_empty() {
9125            return Ok(SqlQueryResult {
9126                columns: Vec::new(),
9127                rows: Vec::new(),
9128            });
9129        }
9130
9131        let columns: Vec<String> = rows[0]
9132            .columns()
9133            .iter()
9134            .map(|c| c.name().to_owned())
9135            .collect();
9136
9137        let mut out_rows = Vec::new();
9138        for row in &rows {
9139            let mut values = Vec::new();
9140            for idx in 0..row.len() {
9141                let value = pg_value_to_scalar(row, idx);
9142                values.push(value);
9143            }
9144            out_rows.push(values);
9145        }
9146
9147        Ok(SqlQueryResult {
9148            columns,
9149            rows: out_rows,
9150        })
9151    }
9152
9153    fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
9154        self.client
9155            .borrow_mut()
9156            .batch_execute(sql)
9157            .map_err(|e| IoError::Sql(format!("PostgreSQL batch execute failed: {e}")))
9158    }
9159
9160    fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
9161        let rows = self
9162            .client
9163            .borrow_mut()
9164            .query(
9165                "SELECT 1 FROM information_schema.tables WHERE table_name = $1 LIMIT 1",
9166                &[&table_name],
9167            )
9168            .map_err(|e| IoError::Sql(format!("PostgreSQL table_exists failed: {e}")))?;
9169        Ok(!rows.is_empty())
9170    }
9171
9172    fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
9173        let mut client = self.client.borrow_mut();
9174        for row in rows {
9175            let pg_params: Vec<Box<dyn postgres::types::ToSql + Sync>> = row
9176                .iter()
9177                .map(|s| -> Box<dyn postgres::types::ToSql + Sync> {
9178                    match s {
9179                        Scalar::Null(_) => Box::new(Option::<i64>::None),
9180                        Scalar::Bool(b) => Box::new(*b),
9181                        Scalar::Int64(i) => Box::new(*i),
9182                        Scalar::Float64(f) => Box::new(*f),
9183                        Scalar::Utf8(s) => Box::new(s.clone()),
9184                        _ => Box::new(Option::<i64>::None),
9185                    }
9186                })
9187                .collect();
9188            let param_refs: Vec<&(dyn postgres::types::ToSql + Sync)> =
9189                pg_params.iter().map(|b| b.as_ref()).collect();
9190            client
9191                .execute(insert_sql, &param_refs)
9192                .map_err(|e| IoError::Sql(format!("PostgreSQL insert failed: {e}")))?;
9193        }
9194        Ok(())
9195    }
9196
9197    fn dtype_sql(&self, dtype: DType) -> &'static str {
9198        match dtype {
9199            DType::Bool | DType::BoolNullable => "BOOLEAN",
9200            DType::Int64 | DType::Int64Nullable => "BIGINT",
9201            DType::Float64 => "DOUBLE PRECISION",
9202            DType::Utf8 => "TEXT",
9203            DType::Datetime64 => "TIMESTAMP",
9204            DType::Timedelta64 => "INTERVAL",
9205            _ => "TEXT",
9206        }
9207    }
9208
9209    fn index_dtype_sql(&self, index: &Index) -> &'static str {
9210        pg_sql_dtype_from_index(index)
9211    }
9212
9213    fn dialect_name(&self) -> &'static str {
9214        "postgresql"
9215    }
9216
9217    fn parameter_marker(&self, ordinal: usize) -> String {
9218        format!("${ordinal}")
9219    }
9220
9221    fn supports_returning(&self) -> bool {
9222        true
9223    }
9224
9225    fn max_param_count(&self) -> Option<usize> {
9226        Some(65535)
9227    }
9228
9229    fn supports_schemas(&self) -> bool {
9230        true
9231    }
9232
9233    fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
9234        if ident.contains('\0') {
9235            return Err(IoError::Sql("invalid identifier: NUL byte".to_owned()));
9236        }
9237        Ok(format!("\"{}\"", ident.replace('"', "\"\"")))
9238    }
9239
9240    fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
9241        let schema = schema.unwrap_or("public");
9242        let rows = self
9243            .client
9244            .borrow_mut()
9245            .query(
9246                "SELECT table_name FROM information_schema.tables WHERE table_schema = $1 ORDER BY table_name",
9247                &[&schema],
9248            )
9249            .map_err(|e| IoError::Sql(format!("PostgreSQL list_tables failed: {e}")))?;
9250        Ok(rows.iter().map(|r| r.get(0)).collect())
9251    }
9252
9253    fn list_schemas(&self) -> Result<Vec<String>, IoError> {
9254        let rows = self
9255            .client
9256            .borrow_mut()
9257            .query(
9258                "SELECT schema_name FROM information_schema.schemata ORDER BY schema_name",
9259                &[],
9260            )
9261            .map_err(|e| IoError::Sql(format!("PostgreSQL list_schemas failed: {e}")))?;
9262        Ok(rows.iter().map(|r| r.get(0)).collect())
9263    }
9264}
9265
9266#[cfg(feature = "sql-postgresql")]
9267fn pg_sql_dtype_from_index(index: &Index) -> &'static str {
9268    for label in index.labels() {
9269        match label {
9270            IndexLabel::Int64(_) => return "BIGINT",
9271            IndexLabel::Utf8(_) => return "TEXT",
9272            IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "INTERVAL",
9273            IndexLabel::Datetime64(v) if *v != i64::MIN => return "TIMESTAMP",
9274            _ => {}
9275        }
9276    }
9277    "TEXT"
9278}
9279
9280#[cfg(feature = "sql-postgresql")]
9281fn pg_value_to_scalar(row: &postgres::Row, idx: usize) -> Scalar {
9282    if let Ok(Some(v)) = row.try_get::<_, Option<bool>>(idx) {
9283        return Scalar::Bool(v);
9284    }
9285    if let Ok(Some(v)) = row.try_get::<_, Option<i64>>(idx) {
9286        return Scalar::Int64(v);
9287    }
9288    if let Ok(Some(v)) = row.try_get::<_, Option<i32>>(idx) {
9289        return Scalar::Int64(i64::from(v));
9290    }
9291    if let Ok(Some(v)) = row.try_get::<_, Option<f64>>(idx) {
9292        return Scalar::Float64(v);
9293    }
9294    if let Ok(Some(v)) = row.try_get::<_, Option<f32>>(idx) {
9295        return Scalar::Float64(f64::from(v));
9296    }
9297    if let Ok(Some(v)) = row.try_get::<_, Option<String>>(idx) {
9298        return Scalar::Utf8(v);
9299    }
9300    Scalar::Null(crate::NullKind::Null)
9301}
9302
9303// ============================================================================
9304// MySQL SqlConnection Implementation (feature = "sql-mysql")
9305// ============================================================================
9306
9307/// Wrapper around `mysql::Conn` providing interior mutability for the
9308/// `SqlConnection` trait (which requires `&self`).
9309#[cfg(feature = "sql-mysql")]
9310pub struct MysqlConnection {
9311    conn: RefCell<mysql::Conn>,
9312}
9313
9314#[cfg(feature = "sql-mysql")]
9315impl MysqlConnection {
9316    pub fn new(conn: mysql::Conn) -> Self {
9317        Self {
9318            conn: RefCell::new(conn),
9319        }
9320    }
9321}
9322
9323#[cfg(feature = "sql-mysql")]
9324impl SqlConnection for MysqlConnection {
9325    fn query(&self, query_str: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
9326        use mysql::prelude::*;
9327
9328        let mysql_params: Vec<mysql::Value> = params.iter().map(scalar_to_mysql_value).collect();
9329        let result: Vec<mysql::Row> = self
9330            .conn
9331            .borrow_mut()
9332            .exec(query_str, mysql_params)
9333            .map_err(|e| IoError::Sql(format!("MySQL query failed: {e}")))?;
9334
9335        if result.is_empty() {
9336            return Ok(SqlQueryResult {
9337                columns: Vec::new(),
9338                rows: Vec::new(),
9339            });
9340        }
9341
9342        let columns: Vec<String> = result[0]
9343            .columns_ref()
9344            .iter()
9345            .map(|c| c.name_str().to_string())
9346            .collect();
9347
9348        let mut out_rows = Vec::new();
9349        for row in &result {
9350            let mut values = Vec::new();
9351            for idx in 0..row.len() {
9352                let value = mysql_value_to_scalar(row.get(idx));
9353                values.push(value);
9354            }
9355            out_rows.push(values);
9356        }
9357
9358        Ok(SqlQueryResult {
9359            columns,
9360            rows: out_rows,
9361        })
9362    }
9363
9364    fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
9365        use mysql::prelude::*;
9366        let mut conn = self.conn.borrow_mut();
9367        for statement in sql.split(';').filter(|s| !s.trim().is_empty()) {
9368            conn.query_drop(statement)
9369                .map_err(|e| IoError::Sql(format!("MySQL execute failed: {e}")))?;
9370        }
9371        Ok(())
9372    }
9373
9374    fn table_exists(&self, table_name: &str) -> Result<bool, IoError> {
9375        use mysql::prelude::*;
9376        let result: Option<(i32,)> = self
9377            .conn
9378            .borrow_mut()
9379            .exec_first(
9380                "SELECT 1 FROM information_schema.tables WHERE table_name = ? LIMIT 1",
9381                (table_name,),
9382            )
9383            .map_err(|e| IoError::Sql(format!("MySQL table_exists failed: {e}")))?;
9384        Ok(result.is_some())
9385    }
9386
9387    fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
9388        use mysql::prelude::*;
9389        let mut conn = self.conn.borrow_mut();
9390        for row in rows {
9391            let params: Vec<mysql::Value> = row.iter().map(scalar_to_mysql_value).collect();
9392            conn.exec_drop(insert_sql, params)
9393                .map_err(|e| IoError::Sql(format!("MySQL insert failed: {e}")))?;
9394        }
9395        Ok(())
9396    }
9397
9398    fn dtype_sql(&self, dtype: DType) -> &'static str {
9399        match dtype {
9400            DType::Bool | DType::BoolNullable => "TINYINT(1)",
9401            DType::Int64 | DType::Int64Nullable => "BIGINT",
9402            DType::Float64 => "DOUBLE",
9403            DType::Utf8 => "TEXT",
9404            DType::Datetime64 => "DATETIME",
9405            DType::Timedelta64 => "TIME",
9406            _ => "TEXT",
9407        }
9408    }
9409
9410    fn index_dtype_sql(&self, index: &Index) -> &'static str {
9411        mysql_sql_dtype_from_index(index)
9412    }
9413
9414    fn dialect_name(&self) -> &'static str {
9415        "mysql"
9416    }
9417
9418    fn parameter_marker(&self, _ordinal: usize) -> String {
9419        "?".to_owned()
9420    }
9421
9422    fn supports_returning(&self) -> bool {
9423        false
9424    }
9425
9426    fn max_param_count(&self) -> Option<usize> {
9427        Some(65535)
9428    }
9429
9430    fn max_identifier_length(&self) -> Option<usize> {
9431        Some(64)
9432    }
9433
9434    fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
9435        if ident.contains('\0') {
9436            return Err(IoError::Sql("invalid identifier: NUL byte".to_owned()));
9437        }
9438        Ok(format!("`{}`", ident.replace('`', "``")))
9439    }
9440
9441    fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
9442        use mysql::prelude::*;
9443        let rows: Vec<(String,)> = self
9444            .conn
9445            .borrow_mut()
9446            .query("SHOW TABLES")
9447            .map_err(|e| IoError::Sql(format!("MySQL list_tables failed: {e}")))?;
9448        Ok(rows.into_iter().map(|(name,)| name).collect())
9449    }
9450}
9451
9452#[cfg(feature = "sql-mysql")]
9453fn mysql_sql_dtype_from_index(index: &Index) -> &'static str {
9454    for label in index.labels() {
9455        match label {
9456            IndexLabel::Int64(_) => return "BIGINT",
9457            IndexLabel::Utf8(_) => return "VARCHAR(255)",
9458            IndexLabel::Timedelta64(v) if *v != Timedelta::NAT => return "TIME",
9459            IndexLabel::Datetime64(v) if *v != i64::MIN => return "DATETIME",
9460            _ => {}
9461        }
9462    }
9463    "VARCHAR(255)"
9464}
9465
9466#[cfg(feature = "sql-mysql")]
9467fn scalar_to_mysql_value(s: &Scalar) -> mysql::Value {
9468    match s {
9469        Scalar::Null(_) => mysql::Value::NULL,
9470        Scalar::Bool(b) => mysql::Value::from(*b),
9471        Scalar::Int64(i) => mysql::Value::from(*i),
9472        Scalar::Float64(f) => mysql::Value::from(*f),
9473        Scalar::Utf8(s) => mysql::Value::from(s.as_str()),
9474        _ => mysql::Value::NULL,
9475    }
9476}
9477
9478#[cfg(feature = "sql-mysql")]
9479fn mysql_value_to_scalar(v: Option<mysql::Value>) -> Scalar {
9480    match v {
9481        None | Some(mysql::Value::NULL) => Scalar::Null(crate::NullKind::Null),
9482        Some(mysql::Value::Bytes(b)) => Scalar::Utf8(String::from_utf8_lossy(&b).into_owned()),
9483        Some(mysql::Value::Int(i)) => Scalar::Int64(i),
9484        Some(mysql::Value::UInt(u)) => Scalar::Int64(u as i64),
9485        Some(mysql::Value::Float(f)) => Scalar::Float64(f as f64),
9486        Some(mysql::Value::Double(d)) => Scalar::Float64(d),
9487        _ => Scalar::Null(crate::NullKind::Null),
9488    }
9489}
9490
9491#[cfg(test)]
9492fn sql_create_table_query<C: SqlConnection>(
9493    conn: &C,
9494    table_name: &str,
9495    column_defs: &[String],
9496) -> Result<String, IoError> {
9497    sql_create_table_query_in_schema(conn, table_name, None, column_defs)
9498}
9499
9500/// Build a `CREATE TABLE IF NOT EXISTS ...` statement, optionally
9501/// schema-qualified.
9502///
9503/// Per br-frankenpandas-udn6 (fd90.15). When `schema` is `Some(s)` AND
9504/// `conn.supports_schemas()`, the target becomes `"schema"."table"`. On
9505/// backends that report false, any `Some(s)` is silently ignored.
9506fn sql_create_table_query_in_schema<C: SqlConnection>(
9507    conn: &C,
9508    table_name: &str,
9509    schema: Option<&str>,
9510    column_defs: &[String],
9511) -> Result<String, IoError> {
9512    validate_sql_table_name(table_name)?;
9513    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9514    let qualified = match schema {
9515        Some(s) if conn.supports_schemas() => {
9516            validate_sql_schema_name(s)?;
9517            format!(
9518                "{}.{}",
9519                conn.quote_identifier(s)?,
9520                conn.quote_identifier(table_name)?
9521            )
9522        }
9523        _ => conn.quote_identifier(table_name)?,
9524    };
9525    Ok(format!(
9526        "CREATE TABLE IF NOT EXISTS {qualified} ({})",
9527        column_defs.join(", ")
9528    ))
9529}
9530
9531#[cfg(test)]
9532fn sql_insert_rows_query<C: SqlConnection>(
9533    conn: &C,
9534    table_name: &str,
9535    column_names: &[String],
9536) -> Result<String, IoError> {
9537    sql_insert_rows_query_in_schema(conn, table_name, None, column_names)
9538}
9539
9540/// Build an `INSERT INTO ... VALUES (...)` statement, optionally
9541/// schema-qualified.
9542///
9543/// Per br-frankenpandas-udn6 (fd90.15). Same schema rules as the CREATE
9544/// TABLE counterpart.
9545fn sql_insert_rows_query_in_schema<C: SqlConnection>(
9546    conn: &C,
9547    table_name: &str,
9548    schema: Option<&str>,
9549    column_names: &[String],
9550) -> Result<String, IoError> {
9551    validate_sql_table_name(table_name)?;
9552    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9553    validate_sql_column_identifier_lengths(conn, column_names.iter())?;
9554    let qualified = match schema {
9555        Some(s) if conn.supports_schemas() => {
9556            validate_sql_schema_name(s)?;
9557            format!(
9558                "{}.{}",
9559                conn.quote_identifier(s)?,
9560                conn.quote_identifier(table_name)?
9561            )
9562        }
9563        _ => conn.quote_identifier(table_name)?,
9564    };
9565    let quoted_columns = column_names
9566        .iter()
9567        .map(|name| conn.quote_identifier(name))
9568        .collect::<Result<Vec<_>, _>>()?
9569        .join(", ");
9570    let placeholders = (1..=column_names.len())
9571        .map(|ordinal| conn.parameter_marker(ordinal))
9572        .collect::<Vec<_>>()
9573        .join(", ");
9574    Ok(format!(
9575        "INSERT INTO {qualified} ({quoted_columns}) VALUES ({placeholders})"
9576    ))
9577}
9578
9579/// Build a multi-row `INSERT INTO ... VALUES (...), (...), ...`
9580/// statement, optionally schema-qualified.
9581///
9582/// Placeholder ordinals span 1..=`num_rows` * `column_names.len()` so
9583/// PostgreSQL-style `$N` markers stay unique across the whole statement.
9584/// SQLite's `?N` and the bare `?` default also work because positional
9585/// binding consumes ordinals in left-to-right order.
9586///
9587/// Per br-frankenpandas-i0ml (fd90.19).
9588fn sql_multi_row_insert_query_in_schema<C: SqlConnection>(
9589    conn: &C,
9590    table_name: &str,
9591    schema: Option<&str>,
9592    column_names: &[String],
9593    num_rows: usize,
9594) -> Result<String, IoError> {
9595    validate_sql_table_name(table_name)?;
9596    if num_rows == 0 || column_names.is_empty() {
9597        return Err(IoError::Sql(
9598            "multi-row insert requires at least one row and one column".to_owned(),
9599        ));
9600    }
9601    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9602    validate_sql_column_identifier_lengths(conn, column_names.iter())?;
9603    let qualified = match schema {
9604        Some(s) if conn.supports_schemas() => {
9605            validate_sql_schema_name(s)?;
9606            format!(
9607                "{}.{}",
9608                conn.quote_identifier(s)?,
9609                conn.quote_identifier(table_name)?
9610            )
9611        }
9612        _ => conn.quote_identifier(table_name)?,
9613    };
9614    let quoted_columns = column_names
9615        .iter()
9616        .map(|name| conn.quote_identifier(name))
9617        .collect::<Result<Vec<_>, _>>()?
9618        .join(", ");
9619    let cols = column_names.len();
9620    let mut tuples = Vec::with_capacity(num_rows);
9621    let mut next_ord = 1usize;
9622    for _ in 0..num_rows {
9623        let row_placeholders = (0..cols)
9624            .map(|_| {
9625                let marker = conn.parameter_marker(next_ord);
9626                next_ord += 1;
9627                marker
9628            })
9629            .collect::<Vec<_>>()
9630            .join(", ");
9631        tuples.push(format!("({row_placeholders})"));
9632    }
9633    Ok(format!(
9634        "INSERT INTO {qualified} ({quoted_columns}) VALUES {}",
9635        tuples.join(", ")
9636    ))
9637}
9638
9639/// Build a `DROP TABLE IF EXISTS ...` statement, optionally
9640/// schema-qualified.
9641///
9642/// Per br-frankenpandas-hxob (fd90.16). Companion to
9643/// `sql_create_table_query_in_schema`. Same schema rules: when
9644/// `schema` is `Some(s)` AND `conn.supports_schemas()`, the target
9645/// becomes `\"schema\".\"table\"`; otherwise the bare table name is
9646/// used. Routes through `conn.quote_identifier` so backend dialect
9647/// overrides (MySQL backticks etc.) take effect on the drop path.
9648fn sql_drop_table_query_in_schema<C: SqlConnection>(
9649    conn: &C,
9650    table_name: &str,
9651    schema: Option<&str>,
9652) -> Result<String, IoError> {
9653    validate_sql_table_name(table_name)?;
9654    validate_sql_table_ref_identifier_lengths(conn, table_name, schema)?;
9655    let qualified = match schema {
9656        Some(s) if conn.supports_schemas() => {
9657            validate_sql_schema_name(s)?;
9658            format!(
9659                "{}.{}",
9660                conn.quote_identifier(s)?,
9661                conn.quote_identifier(table_name)?
9662            )
9663        }
9664        _ => conn.quote_identifier(table_name)?,
9665    };
9666    Ok(format!("DROP TABLE IF EXISTS {qualified}"))
9667}
9668
9669/// Read the result of a SQL query into a DataFrame.
9670///
9671/// Matches `pd.read_sql(sql, con)`.
9672pub fn read_sql<C: SqlConnection>(conn: &C, query: &str) -> Result<DataFrame, IoError> {
9673    read_sql_with_options(conn, query, &SqlReadOptions::default())
9674}
9675
9676/// Read the result of a SQL query into a DataFrame with read-time options.
9677///
9678/// Matches the supported subset of `pd.read_sql(sql, con, params=[...], parse_dates=...)`.
9679pub fn read_sql_with_options<C: SqlConnection>(
9680    conn: &C,
9681    query: &str,
9682    options: &SqlReadOptions,
9683) -> Result<DataFrame, IoError> {
9684    // Per br-frankenpandas-t1777: query readers take a raw SELECT written
9685    // by the caller, so options.columns has no effect. Silently ignoring
9686    // diverged from the table-reader sibling. Reject to surface the
9687    // mismatch — callers should embed the column list in the SELECT, or
9688    // use read_sql_table_with_options to generate the projection.
9689    if options.columns.is_some() {
9690        return Err(IoError::Sql(
9691            "options.columns is meaningful only for table readers; embed the column list in \
9692             the SELECT or use read_sql_table_with_options to generate the projection from a \
9693             table name"
9694                .to_owned(),
9695        ));
9696    }
9697    let (headers, columns, dtype_hints) = sql_query_to_columns(conn, query, options)?;
9698    let frame = dataframe_from_sql_columns(headers, columns, dtype_hints)?;
9699    apply_sql_index_col(frame, options.index_col.as_deref())
9700}
9701
9702/// Per br-frankenpandas-c1h9 (fd90.36): promote `options.index_col`
9703/// to the DataFrame index when set, with empty-string rejection.
9704fn apply_sql_index_col(frame: DataFrame, index_col: Option<&str>) -> Result<DataFrame, IoError> {
9705    let Some(name) = index_col else {
9706        return Ok(frame);
9707    };
9708    if name.is_empty() {
9709        return Err(IoError::Sql(
9710            "index_col: empty string is not a valid column name".to_owned(),
9711        ));
9712    }
9713    promote_column_to_index(&frame, name)
9714}
9715
9716/// Read the result of a SQL query into a DataFrame.
9717///
9718/// Matches `pd.read_sql_query(sql, con)`. This is the query-only spelling of
9719/// `read_sql`; table-name dispatch stays on `read_sql_table`.
9720pub fn read_sql_query<C: SqlConnection>(conn: &C, query: &str) -> Result<DataFrame, IoError> {
9721    read_sql(conn, query)
9722}
9723
9724/// Read the result of a SQL query into a DataFrame with read-time options.
9725///
9726/// Matches the supported subset of
9727/// `pd.read_sql_query(sql, con, params=[...], parse_dates=..., coerce_float=...)`.
9728pub fn read_sql_query_with_options<C: SqlConnection>(
9729    conn: &C,
9730    query: &str,
9731    options: &SqlReadOptions,
9732) -> Result<DataFrame, IoError> {
9733    read_sql_with_options(conn, query, options)
9734}
9735
9736/// Read a SQL query result with read-time options and optional index promotion.
9737///
9738/// Matches the supported subset of
9739/// `pd.read_sql_query(sql, con, params=[...], parse_dates=..., coerce_float=..., index_col=...)`.
9740pub fn read_sql_query_with_options_and_index_col<C: SqlConnection>(
9741    conn: &C,
9742    query: &str,
9743    options: &SqlReadOptions,
9744    index_col: Option<&str>,
9745) -> Result<DataFrame, IoError> {
9746    if let Some(col_name) = index_col {
9747        let cleared = SqlReadOptions {
9748            index_col: None,
9749            ..options.clone()
9750        };
9751        let frame = read_sql_query_with_options(conn, query, &cleared)?;
9752        return apply_sql_index_col(frame, Some(col_name));
9753    }
9754    read_sql_query_with_options(conn, query, options)
9755}
9756
9757/// Read the result of a SQL query as an iterator of DataFrame chunks.
9758///
9759/// Matches the supported subset of `pd.read_sql_query(sql, con, chunksize=...)`.
9760pub fn read_sql_query_chunks<'conn, C: SqlConnection + 'conn>(
9761    conn: &'conn C,
9762    query: &str,
9763    chunk_size: usize,
9764) -> Result<SqlChunkIterator<'conn>, IoError> {
9765    read_sql_chunks(conn, query, chunk_size)
9766}
9767
9768/// Read a SQL query result as chunks with one column promoted to each chunk's index.
9769///
9770/// Matches the supported subset of
9771/// `pd.read_sql_query(sql, con, index_col=..., chunksize=...)`.
9772pub fn read_sql_query_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
9773    conn: &'conn C,
9774    query: &str,
9775    index_col: Option<&str>,
9776    chunk_size: usize,
9777) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
9778    read_sql_chunks_with_index_col(conn, query, index_col, chunk_size)
9779}
9780
9781/// Read the result of a SQL query as chunks with read-time options.
9782///
9783/// Matches the supported subset of
9784/// `pd.read_sql_query(sql, con, params=[...], parse_dates=..., coerce_float=..., chunksize=...)`.
9785pub fn read_sql_query_chunks_with_options<'conn, C: SqlConnection + 'conn>(
9786    conn: &'conn C,
9787    query: &str,
9788    options: &SqlReadOptions,
9789    chunk_size: usize,
9790) -> Result<SqlChunkIterator<'conn>, IoError> {
9791    if options.index_col.is_some() {
9792        return Err(IoError::Sql(
9793            "options.index_col is set but this entrypoint returns SqlChunkIterator without \
9794             index promotion; use read_sql_query_chunks_with_options_and_index_col to honor \
9795             index_col"
9796                .to_owned(),
9797        ));
9798    }
9799    read_sql_chunks_with_options(conn, query, options, chunk_size)
9800}
9801
9802/// Read a SQL query result as chunks with read-time options and index promotion.
9803///
9804/// Matches the supported subset of
9805/// `pd.read_sql_query(sql, con, params=[...], parse_dates=..., coerce_float=..., index_col=..., chunksize=...)`.
9806pub fn read_sql_query_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
9807    conn: &'conn C,
9808    query: &str,
9809    options: &SqlReadOptions,
9810    index_col: Option<&str>,
9811    chunk_size: usize,
9812) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
9813    read_sql_chunks_with_options_and_index_col(conn, query, options, index_col, chunk_size)
9814}
9815
9816/// Read a SQL query result with one column promoted to the index.
9817///
9818/// Matches `pd.read_sql_query(sql, con, index_col=...)`.
9819pub fn read_sql_query_with_index_col<C: SqlConnection>(
9820    conn: &C,
9821    query: &str,
9822    index_col: Option<&str>,
9823) -> Result<DataFrame, IoError> {
9824    read_sql_with_index_col(conn, query, index_col)
9825}
9826
9827fn sql_trim_chunk_source(query: &str) -> Result<&str, IoError> {
9828    let trimmed = query.trim().trim_end_matches(';').trim();
9829    if trimmed.is_empty() {
9830        Err(IoError::Sql("read_sql query must be non-empty".to_owned()))
9831    } else {
9832        Ok(trimmed)
9833    }
9834}
9835
9836fn sql_paged_query<C: SqlConnection + ?Sized>(
9837    conn: &C,
9838    query: &str,
9839    base_param_count: usize,
9840) -> Result<String, IoError> {
9841    let source = sql_trim_chunk_source(query)?;
9842    let limit_marker = conn.parameter_marker(base_param_count + 1);
9843    let offset_marker = conn.parameter_marker(base_param_count + 2);
9844    Ok(format!(
9845        "SELECT * FROM ({source}) AS frankenpandas_sql_chunk_source \
9846         LIMIT {limit_marker} OFFSET {offset_marker}"
9847    ))
9848}
9849
9850fn sql_paged_options(
9851    options: &SqlReadOptions,
9852    limit: usize,
9853    offset: usize,
9854) -> Result<SqlReadOptions, IoError> {
9855    let limit = i64::try_from(limit)
9856        .map_err(|_| IoError::Sql("read_sql chunksize exceeds i64 range".to_owned()))?;
9857    let offset = i64::try_from(offset)
9858        .map_err(|_| IoError::Sql("read_sql chunk offset exceeds i64 range".to_owned()))?;
9859    let mut params = options.params.clone().unwrap_or_default();
9860    params.push(Scalar::Int64(limit));
9861    params.push(Scalar::Int64(offset));
9862    Ok(SqlReadOptions {
9863        params: Some(params),
9864        ..options.clone()
9865    })
9866}
9867
9868fn sql_paged_query_headers<C: SqlConnection + ?Sized>(
9869    conn: &C,
9870    query: &str,
9871    options: &SqlReadOptions,
9872) -> Result<Vec<String>, IoError> {
9873    let base_param_count = options.params.as_ref().map_or(0, Vec::len);
9874    let paged_query = sql_paged_query(conn, query, base_param_count)?;
9875    let paged_options = sql_paged_options(options, 0, 0)?;
9876    let result = conn.query(&paged_query, paged_options.params.as_deref().unwrap_or(&[]))?;
9877    reject_duplicate_headers(&result.columns)?;
9878    Ok(result.columns)
9879}
9880
9881fn sql_query_to_columns_paged<C: SqlConnection + ?Sized>(
9882    conn: &C,
9883    query: &str,
9884    options: &SqlReadOptions,
9885    chunk_size: usize,
9886    offset: usize,
9887) -> Result<SqlMaterializedColumns, IoError> {
9888    let base_param_count = options.params.as_ref().map_or(0, Vec::len);
9889    let paged_query = sql_paged_query(conn, query, base_param_count)?;
9890    let paged_options = sql_paged_options(options, chunk_size, offset)?;
9891    sql_query_to_columns(conn, &paged_query, &paged_options)
9892}
9893
9894fn sql_query_to_columns<C: SqlConnection + ?Sized>(
9895    conn: &C,
9896    query: &str,
9897    options: &SqlReadOptions,
9898) -> Result<SqlMaterializedColumns, IoError> {
9899    let params = options.params.as_deref().unwrap_or(&[]);
9900    let SqlQueryResult {
9901        columns: headers,
9902        rows,
9903    } = conn.query(query, params)?;
9904    reject_duplicate_headers(&headers)?;
9905    let mut dtype_hints = conn.query_column_dtypes(query, params)?;
9906    dtype_hints.resize(headers.len(), None);
9907    let mut columns: Vec<Vec<Scalar>> = (0..headers.len()).map(|_| Vec::new()).collect();
9908
9909    for row in rows {
9910        for (col_idx, value) in row.into_iter().enumerate() {
9911            if let Some(col_vec) = columns.get_mut(col_idx) {
9912                col_vec.push(value);
9913            }
9914        }
9915    }
9916
9917    if let Some(ref parse_dates) = options.parse_dates {
9918        apply_parse_dates(&headers, &mut columns, parse_dates)?;
9919    }
9920    if options.coerce_float {
9921        apply_sql_coerce_float(&mut columns);
9922    }
9923    if let Some(ref dtype_map) = options.dtype {
9924        apply_sql_dtype_overrides(
9925            &headers,
9926            &mut columns,
9927            dtype_map,
9928            options.parse_dates.as_deref().unwrap_or(&[]),
9929        )?;
9930        for (idx, header) in headers.iter().enumerate() {
9931            if let Some(dtype) = dtype_map.get(header)
9932                && !options
9933                    .parse_dates
9934                    .as_deref()
9935                    .unwrap_or(&[])
9936                    .iter()
9937                    .any(|d| d == header)
9938            {
9939                dtype_hints[idx] = Some(*dtype);
9940            }
9941        }
9942    }
9943
9944    Ok((headers, columns, dtype_hints))
9945}
9946
9947/// Apply pandas-style `dtype={'col': dtype}` overrides to materialized
9948/// SQL result columns. Skips columns also listed in `parse_dates` to
9949/// avoid double-cast errors. Per br-frankenpandas-l9pt (fd90.11).
9950fn apply_sql_dtype_overrides(
9951    headers: &[String],
9952    columns: &mut [Vec<Scalar>],
9953    dtype_map: &BTreeMap<String, DType>,
9954    parse_dates: &[String],
9955) -> Result<(), IoError> {
9956    for (idx, header) in headers.iter().enumerate() {
9957        let Some(target_dtype) = dtype_map.get(header) else {
9958            continue;
9959        };
9960        if parse_dates.iter().any(|d| d == header) {
9961            // parse_dates wins; skip dtype override for this column.
9962            continue;
9963        }
9964        let Some(col) = columns.get_mut(idx) else {
9965            continue;
9966        };
9967        for value in col.iter_mut() {
9968            // Take ownership of the scalar, cast, and write back. NaT/Null
9969            // pass through cast_scalar_owned unchanged so missingness is
9970            // preserved across the override.
9971            let taken = std::mem::replace(value, Scalar::Null(NullKind::Null));
9972            *value = cast_scalar_owned(taken, *target_dtype).map_err(|e| {
9973                IoError::Sql(format!(
9974                    "dtype override on column '{header}' to {target_dtype:?} failed: {e}"
9975                ))
9976            })?;
9977        }
9978    }
9979    Ok(())
9980}
9981
9982fn dataframe_from_sql_columns(
9983    headers: Vec<String>,
9984    columns: Vec<Vec<Scalar>>,
9985    dtype_hints: SqlColumnDtypeHints,
9986) -> Result<DataFrame, IoError> {
9987    let row_count = columns.first().map_or(0, Vec::len);
9988    let mut out_columns = BTreeMap::new();
9989    let mut column_order = Vec::new();
9990
9991    for (idx, (name, values)) in headers.into_iter().zip(columns).enumerate() {
9992        let dtype_hint = dtype_hints.get(idx).copied().flatten();
9993        let has_observed_value = values.iter().any(|value| !matches!(value, Scalar::Null(_)));
9994        let column = match (has_observed_value, dtype_hint) {
9995            (false, Some(dtype)) => Column::new(dtype, values)?,
9996            _ => Column::from_values(values)?,
9997        };
9998        out_columns.insert(name.clone(), column);
9999        column_order.push(name);
10000    }
10001
10002    let index = Index::from_i64((0..row_count as i64).collect());
10003    Ok(DataFrame::new_with_column_order(
10004        index,
10005        out_columns,
10006        column_order,
10007    )?)
10008}
10009
10010/// Read a SQL query result as an iterator of DataFrame chunks.
10011///
10012/// Matches the supported subset of `pd.read_sql(sql, con, chunksize=...)`.
10013/// Each chunk receives a fresh zero-based RangeIndex, matching pandas'
10014/// SQLite chunk iterator behavior.
10015pub fn read_sql_chunks<'conn, C: SqlConnection + 'conn>(
10016    conn: &'conn C,
10017    query: &str,
10018    chunk_size: usize,
10019) -> Result<SqlChunkIterator<'conn>, IoError> {
10020    read_sql_chunks_with_options(conn, query, &SqlReadOptions::default(), chunk_size)
10021}
10022
10023/// Read a SQL query result as DataFrame chunks with read-time options.
10024///
10025/// Backends that opt into `supports_paged_sql_chunks` are queried one bounded
10026/// page at a time through a `LIMIT`/`OFFSET` wrapper so the iterator does not
10027/// hold the full result set in memory. Other backends keep the legacy
10028/// materialized fallback until they provide a native chunk strategy. `params`,
10029/// `parse_dates`, `coerce_float`, and `dtype` are applied to each yielded page.
10030pub fn read_sql_chunks_with_options<'conn, C: SqlConnection + 'conn>(
10031    conn: &'conn C,
10032    query: &str,
10033    options: &SqlReadOptions,
10034    chunk_size: usize,
10035) -> Result<SqlChunkIterator<'conn>, IoError> {
10036    if chunk_size == 0 {
10037        return Err(IoError::Sql(
10038            "read_sql chunksize must be greater than zero".to_owned(),
10039        ));
10040    }
10041    // Per br-frankenpandas-i8kja: this entrypoint returns SqlChunkIterator
10042    // with no index promotion. Honoring options.index_col would silently
10043    // diverge from the full-frame read_sql_with_options sibling (which
10044    // does promote). Reject to surface the mismatch — callers should use
10045    // read_sql_chunks_with_options_and_index_col when index_col is set.
10046    if options.index_col.is_some() {
10047        return Err(IoError::Sql(
10048            "options.index_col is set but this entrypoint returns SqlChunkIterator without \
10049             index promotion; use read_sql_chunks_with_options_and_index_col to honor index_col"
10050                .to_owned(),
10051        ));
10052    }
10053    // Per br-frankenpandas-t1777: query readers take a raw SELECT written
10054    // by the caller, so options.columns has no effect (the projection is
10055    // already in the query string). Silently ignoring would diverge from
10056    // the table-reader sibling (which honors columns to build the SELECT).
10057    // Reject to surface the mismatch — callers should embed the column
10058    // list in the SELECT, or use read_sql_table_chunks_with_options when
10059    // generating the SELECT from a table name.
10060    if options.columns.is_some() {
10061        return Err(IoError::Sql(
10062            "options.columns is meaningful only for table readers; embed the column list in \
10063             the SELECT or use read_sql_table_chunks_with_options to generate the projection \
10064             from a table name"
10065                .to_owned(),
10066        ));
10067    }
10068
10069    if conn.supports_paged_sql_chunks() {
10070        return SqlChunkIterator::paged(conn, query, options, chunk_size);
10071    }
10072
10073    let (headers, columns, dtype_hints) = sql_query_to_columns(conn, query, options)?;
10074    Ok(SqlChunkIterator::materialized(
10075        headers,
10076        columns,
10077        dtype_hints,
10078        chunk_size,
10079    ))
10080}
10081
10082/// Read a SQL query result as DataFrame chunks with read-time options and optional index promotion.
10083///
10084/// `params`, `parse_dates`, `coerce_float`, and `dtype` are applied to each
10085/// yielded page before optional index promotion.
10086pub fn read_sql_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
10087    conn: &'conn C,
10088    query: &str,
10089    options: &SqlReadOptions,
10090    index_col: Option<&str>,
10091    chunk_size: usize,
10092) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10093    // Per br-frankenpandas-t1777: query readers can't apply options.columns
10094    // (caller writes the SELECT). Reject for consistency with the plain
10095    // chunk reader's rejection — the indexed sibling shouldn't be a
10096    // backdoor.
10097    if options.columns.is_some() {
10098        return Err(IoError::Sql(
10099            "options.columns is meaningful only for table readers; embed the column list in \
10100             the SELECT or use read_sql_table_chunks_with_options_and_index_col to generate \
10101             the projection from a table name"
10102                .to_owned(),
10103        ));
10104    }
10105    // The plain chunk reader rejects options.index_col (see i8kja); clear
10106    // it before delegating so the indexed sibling is the canonical
10107    // honor-index_col entrypoint regardless of which slot the caller used.
10108    let cleared = SqlReadOptions {
10109        index_col: None,
10110        ..options.clone()
10111    };
10112    let inner = read_sql_chunks_with_options(conn, query, &cleared, chunk_size)?;
10113    sql_indexed_chunks(inner, index_col.or(options.index_col.as_deref()))
10114}
10115
10116/// Read a SQL query result as DataFrame chunks with optional index promotion.
10117///
10118/// Matches the supported subset of `pd.read_sql(sql, con, index_col=..., chunksize=...)`.
10119pub fn read_sql_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
10120    conn: &'conn C,
10121    query: &str,
10122    index_col: Option<&str>,
10123    chunk_size: usize,
10124) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10125    let inner = read_sql_chunks(conn, query, chunk_size)?;
10126    sql_indexed_chunks(inner, index_col)
10127}
10128
10129/// Read a SQL query result with one column promoted to the index.
10130///
10131/// Matches `pd.read_sql(sql, con, index_col=...)`. When
10132/// `index_col=Some(name)` the named column is removed from the data
10133/// columns and its values become the DataFrame's row index. Returns
10134/// `IoError::Sql` if the named column is absent from the result set.
10135pub fn read_sql_with_index_col<C: SqlConnection>(
10136    conn: &C,
10137    query: &str,
10138    index_col: Option<&str>,
10139) -> Result<DataFrame, IoError> {
10140    let frame = read_sql(conn, query)?;
10141    apply_sql_index_col(frame, index_col)
10142}
10143
10144/// Read an entire SQL table with one column promoted to the index.
10145///
10146/// Matches `pd.read_sql_table(table, con, index_col=...)`.
10147pub fn read_sql_table_with_index_col<C: SqlConnection>(
10148    conn: &C,
10149    table_name: &str,
10150    index_col: Option<&str>,
10151) -> Result<DataFrame, IoError> {
10152    let frame = read_sql_table(conn, table_name)?;
10153    apply_sql_index_col(frame, index_col)
10154}
10155
10156fn promote_column_to_index(frame: &DataFrame, col_name: &str) -> Result<DataFrame, IoError> {
10157    let column = frame.column(col_name).ok_or_else(|| {
10158        IoError::Sql(format!(
10159            "index_col {col_name:?} not present in result columns"
10160        ))
10161    })?;
10162    let labels: Vec<IndexLabel> = column
10163        .values()
10164        .iter()
10165        .map(|v| match v {
10166            Scalar::Int64(i) => IndexLabel::Int64(*i),
10167            Scalar::Utf8(s) => IndexLabel::Utf8(s.clone()),
10168            Scalar::Float64(f) if !f.is_nan() => IndexLabel::Utf8(f.to_string()),
10169            Scalar::Bool(b) => IndexLabel::Utf8(if *b { "True" } else { "False" }.to_string()),
10170            Scalar::Timedelta64(ns) => IndexLabel::Timedelta64(*ns),
10171            _ => IndexLabel::Utf8("NaN".to_owned()),
10172        })
10173        .collect();
10174    let new_index = Index::new(labels).set_name(col_name);
10175
10176    let mut new_columns = std::collections::BTreeMap::new();
10177    let mut new_order = Vec::new();
10178    for name in frame.column_names() {
10179        if name == col_name {
10180            continue;
10181        }
10182        if let Some(col) = frame.column(name) {
10183            new_columns.insert(name.clone(), col.clone());
10184            new_order.push(name.clone());
10185        }
10186    }
10187
10188    Ok(DataFrame::new_with_column_order(
10189        new_index,
10190        new_columns,
10191        new_order,
10192    )?)
10193}
10194
10195/// Read an entire SQL table into a DataFrame.
10196///
10197/// Matches `pd.read_sql_table(table_name, con)`.
10198pub fn read_sql_table<C: SqlConnection>(conn: &C, table_name: &str) -> Result<DataFrame, IoError> {
10199    read_sql(conn, &sql_select_all_query(conn, table_name)?)
10200}
10201
10202/// List user-visible table names known to the SQL backend.
10203///
10204/// Matches the supported subset of
10205/// `pd.io.sql.SQLDatabase.list_tables(schema=...)`. When the backend
10206/// reports `supports_schemas() == false` (SQLite), `schema` is ignored
10207/// and all tables in the single namespace are returned. When the
10208/// backend supports schemas (PostgreSQL, MySQL, MSSQL), `Some(s)`
10209/// scopes the listing. `None` passes through to the backend
10210/// unchanged — backends MAY consult `default_schema()` for their own
10211/// fallback logic if desired (per fd90.57: this wrapper does NOT
10212/// apply the fallback automatically).
10213///
10214/// Per br-frankenpandas-vhq2 (fd90.20).
10215pub fn list_sql_tables<C: SqlConnection>(
10216    conn: &C,
10217    schema: Option<&str>,
10218) -> Result<Vec<String>, IoError> {
10219    conn.list_tables(schema)
10220}
10221
10222/// Introspect a SQL table's column metadata, optionally schema-scoped.
10223///
10224/// Matches the supported subset of
10225/// `pd.io.sql.SQLDatabase.has_table` + `SQLAlchemy.MetaData.reflect`
10226/// for column-level details. Returns `Ok(None)` when the table does
10227/// not exist. Schema arg is silently ignored when the backend reports
10228/// `supports_schemas() == false` (SQLite).
10229///
10230/// Per br-frankenpandas-w43q (fd90.21).
10231pub fn sql_table_schema<C: SqlConnection>(
10232    conn: &C,
10233    table_name: &str,
10234    schema: Option<&str>,
10235) -> Result<Option<SqlTableSchema>, IoError> {
10236    conn.table_schema(table_name, schema)
10237}
10238
10239/// List user-visible schemas exposed by the SQL backend.
10240///
10241/// Matches `SQLAlchemy.Inspector.get_schema_names()` shape. Single
10242/// namespace backends (SQLite) return an empty vector. Multi-schema
10243/// backends return the schemas the connection's role can see, with
10244/// internal/system schemas filtered out.
10245///
10246/// Per br-frankenpandas-lxhi (fd90.22).
10247pub fn list_sql_schemas<C: SqlConnection>(conn: &C) -> Result<Vec<String>, IoError> {
10248    conn.list_schemas()
10249}
10250
10251/// Reset a SQL table to empty without dropping its definition.
10252///
10253/// On backends that override the default (PostgreSQL, MySQL), this
10254/// uses `TRUNCATE TABLE` for a DDL-style fast-path reset. On backends
10255/// that inherit the default (SQLite), this emits `DELETE FROM <table>`,
10256/// which is universal but slower on large tables. Schema arg is
10257/// silently ignored when `supports_schemas() == false`.
10258///
10259/// Per br-frankenpandas-phum (fd90.23).
10260pub fn truncate_sql_table<C: SqlConnection>(
10261    conn: &C,
10262    table_name: &str,
10263    schema: Option<&str>,
10264) -> Result<(), IoError> {
10265    conn.truncate_table(table_name, schema)
10266}
10267
10268/// Probe the SQL backend's server version string.
10269///
10270/// Returns `Ok(None)` for backends that can't (or don't) introspect
10271/// their version. SQLite returns `Some("3.x.y")`. PostgreSQL/MySQL
10272/// impls return their respective `SHOW server_version` /
10273/// `SELECT VERSION()` payloads. Useful for dialect-version gating
10274/// (RETURNING, JSON ops, generated columns) and diagnostics.
10275///
10276/// Per br-frankenpandas-e23k (fd90.24).
10277pub fn sql_server_version<C: SqlConnection>(conn: &C) -> Result<Option<String>, IoError> {
10278    conn.server_version()
10279}
10280
10281/// Return the primary-key column names for a SQL table, ordered by
10282/// the table's primary-key ordinal.
10283///
10284/// Returns an empty vector when the table doesn't exist, has no
10285/// primary key, or the backend can't introspect column metadata.
10286/// Useful for upsert conflict-target generation and `index_label`
10287/// defaulting.
10288///
10289/// Per br-frankenpandas-uw3y (fd90.25).
10290pub fn sql_primary_key_columns<C: SqlConnection>(
10291    conn: &C,
10292    table_name: &str,
10293    schema: Option<&str>,
10294) -> Result<Vec<String>, IoError> {
10295    conn.primary_key_columns(table_name, schema)
10296}
10297
10298/// List user-defined indexes on a SQL table, optionally schema-scoped.
10299///
10300/// Matches `SQLAlchemy.Inspector.get_indexes()` shape. Returns an
10301/// empty vector when the table doesn't exist, has no user-created
10302/// indexes, or the backend can't introspect. Auto-created PRIMARY-KEY
10303/// indexes are filtered out (they're surfaced via primary_key_columns
10304/// instead).
10305///
10306/// Per br-frankenpandas-bgv9 (fd90.28).
10307pub fn list_sql_indexes<C: SqlConnection>(
10308    conn: &C,
10309    table_name: &str,
10310    schema: Option<&str>,
10311) -> Result<Vec<SqlIndexSchema>, IoError> {
10312    conn.list_indexes(table_name, schema)
10313}
10314
10315/// List user-visible view names known to the SQL backend.
10316///
10317/// Matches `SQLAlchemy.Inspector.get_view_names()` shape. Companion
10318/// to `list_sql_tables` — pandas/SQLAlchemy keep tables and views in
10319/// distinct buckets so `pd.read_sql_table` can distinguish them.
10320/// Schema arg is silently ignored when `supports_schemas() == false`.
10321///
10322/// Per br-frankenpandas-gm3r (fd90.30).
10323pub fn list_sql_views<C: SqlConnection>(
10324    conn: &C,
10325    schema: Option<&str>,
10326) -> Result<Vec<String>, IoError> {
10327    conn.list_views(schema)
10328}
10329
10330/// List foreign-key constraints declared on a SQL table, optionally
10331/// schema-scoped.
10332///
10333/// Matches `SQLAlchemy.Inspector.get_foreign_keys()` shape. Returns
10334/// an empty vector when the table has no FKs or the backend can't
10335/// introspect. Composite FKs are returned as a single entry with
10336/// paired `columns` / `referenced_columns` ordered by declaration
10337/// position. SQLite does not expose constraint names via PRAGMA, so
10338/// `constraint_name` is `None` there.
10339///
10340/// Per br-frankenpandas-uht8 (fd90.29).
10341pub fn list_sql_foreign_keys<C: SqlConnection>(
10342    conn: &C,
10343    table_name: &str,
10344    schema: Option<&str>,
10345) -> Result<Vec<SqlForeignKeySchema>, IoError> {
10346    conn.list_foreign_keys(table_name, schema)
10347}
10348
10349/// Probe the table-level comment for a SQL table, optionally
10350/// schema-scoped.
10351///
10352/// Matches `SQLAlchemy.Inspector.get_table_comment()` shape — returns
10353/// `Ok(Some(text))` when a comment exists, `Ok(None)` otherwise.
10354/// SQLite has no native table-comment storage and returns `None`.
10355/// PostgreSQL/MySQL/MSSQL impls override with their respective
10356/// catalog queries.
10357///
10358/// Per br-frankenpandas-yu3w (fd90.32).
10359pub fn sql_table_comment<C: SqlConnection>(
10360    conn: &C,
10361    table_name: &str,
10362    schema: Option<&str>,
10363) -> Result<Option<String>, IoError> {
10364    conn.table_comment(table_name, schema)
10365}
10366
10367/// List UNIQUE constraints declared on a SQL table.
10368///
10369/// Matches `SQLAlchemy.Inspector.get_unique_constraints()` shape.
10370/// Surfaces only inline `UNIQUE` declarations and `UNIQUE (...)`
10371/// table constraints. User-created `CREATE UNIQUE INDEX` indexes
10372/// remain in `list_sql_indexes` (with `unique == true`). The two
10373/// listings are intentionally disjoint to match SQLAlchemy.
10374///
10375/// Per br-frankenpandas-sh4v (fd90.31).
10376pub fn list_sql_unique_constraints<C: SqlConnection>(
10377    conn: &C,
10378    table_name: &str,
10379    schema: Option<&str>,
10380) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
10381    conn.list_unique_constraints(table_name, schema)
10382}
10383
10384/// Maximum identifier length supported by the SQL backend, or `None`
10385/// when no documented limit exists.
10386///
10387/// Useful for to_sql validation: backends that override this report
10388/// their cap (PostgreSQL=63, MySQL=64, MSSQL=128) so auto-generated
10389/// index / constraint / column names can be truncated or rejected
10390/// before round-tripping through DDL that would silently truncate.
10391///
10392/// Per br-frankenpandas-cs81 (fd90.26).
10393pub fn sql_max_identifier_length<C: SqlConnection>(conn: &C) -> Option<usize> {
10394    conn.max_identifier_length()
10395}
10396
10397/// Backend capability summary exposed through `SqlInspector`.
10398///
10399/// The per-field values come from `SqlConnection` probes so concrete
10400/// backends can report their native ceilings without forcing callers to
10401/// branch on the connection type.
10402#[derive(Debug, Clone, PartialEq, Eq)]
10403pub struct SqlBackendCaps {
10404    pub dialect_name: &'static str,
10405    pub server_version: Option<String>,
10406    pub supports_returning: bool,
10407    pub supports_schemas: bool,
10408    pub max_param_count: Option<usize>,
10409    pub max_identifier_length: Option<usize>,
10410}
10411
10412impl SqlBackendCaps {
10413    /// Maximum rows in one parameter-bound INSERT for `column_count`.
10414    ///
10415    /// Returns `None` when the backend has no known parameter ceiling, or
10416    /// when `column_count` is zero and therefore no parameter-derived row
10417    /// ceiling can be computed.
10418    #[must_use]
10419    pub fn max_insert_rows(&self, column_count: usize) -> Option<usize> {
10420        sql_max_insert_rows_for_columns(self.max_param_count, column_count)
10421    }
10422}
10423
10424/// Maximum bound parameters supported by the SQL backend, if known.
10425#[must_use]
10426pub fn sql_max_param_count<C: SqlConnection>(conn: &C) -> Option<usize> {
10427    conn.max_param_count()
10428}
10429
10430/// Whether the SQL backend supports native `INSERT ... RETURNING`.
10431#[must_use]
10432pub fn sql_supports_returning<C: SqlConnection>(conn: &C) -> bool {
10433    conn.supports_returning()
10434}
10435
10436/// Whether the SQL backend exposes schema-qualified namespaces.
10437#[must_use]
10438pub fn sql_supports_schemas<C: SqlConnection>(conn: &C) -> bool {
10439    conn.supports_schemas()
10440}
10441
10442/// Maximum INSERT rows for `column_count`, derived from the backend's
10443/// bound-parameter ceiling.
10444///
10445/// A return value of `Some(0)` means the requested column count exceeds
10446/// the backend's total bind-parameter cap.
10447#[must_use]
10448pub fn sql_max_insert_rows<C: SqlConnection>(conn: &C, column_count: usize) -> Option<usize> {
10449    sql_max_insert_rows_for_columns(conn.max_param_count(), column_count)
10450}
10451
10452fn sql_max_insert_rows_for_columns(
10453    max_param_count: Option<usize>,
10454    column_count: usize,
10455) -> Option<usize> {
10456    if column_count == 0 {
10457        return None;
10458    }
10459    max_param_count.map(|max| max / column_count)
10460}
10461
10462/// Gather the backend capability probes into one typed bundle.
10463pub fn sql_backend_caps<C: SqlConnection>(conn: &C) -> Result<SqlBackendCaps, IoError> {
10464    Ok(SqlBackendCaps {
10465        dialect_name: conn.dialect_name(),
10466        server_version: conn.server_version()?,
10467        supports_returning: conn.supports_returning(),
10468        supports_schemas: conn.supports_schemas(),
10469        max_param_count: conn.max_param_count(),
10470        max_identifier_length: conn.max_identifier_length(),
10471    })
10472}
10473
10474/// Backend-agnostic introspection facade matching the
10475/// `SQLAlchemy.Inspector` shape.
10476///
10477/// Per br-frankenpandas-szs9 (fd90.38). Wraps a `&C: SqlConnection`
10478/// and exposes the full fd90.20-37 introspection surface as methods
10479/// on a single bundle so callers don't have to remember which
10480/// `list_sql_*` / `sql_*` free-fn to use. Pure delegation — no new
10481/// behavior, just API ergonomics.
10482///
10483/// ```rust,ignore
10484/// use frankenpandas::SqlInspector;
10485/// let inspector = SqlInspector::new(&conn);
10486/// for table in inspector.tables(None)? {
10487///     for col in inspector.columns(&table, None)?
10488///         .map(|s| s.columns)
10489///         .unwrap_or_default()
10490///     {
10491///         println!("{}: {:?}", col.name, col.declared_type);
10492///     }
10493/// }
10494/// ```
10495#[derive(Debug)]
10496pub struct SqlInspector<'a, C: SqlConnection> {
10497    conn: &'a C,
10498}
10499
10500impl<'a, C: SqlConnection> SqlInspector<'a, C> {
10501    /// Create a new inspector bound to the given connection.
10502    #[must_use]
10503    pub fn new(conn: &'a C) -> Self {
10504        Self { conn }
10505    }
10506
10507    /// List user-visible table names. See `list_sql_tables`.
10508    pub fn tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
10509        self.conn.list_tables(schema)
10510    }
10511
10512    /// List user-visible view names. See `list_sql_views`.
10513    pub fn views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
10514        self.conn.list_views(schema)
10515    }
10516
10517    /// List user-visible schemas. See `list_sql_schemas`.
10518    pub fn schemas(&self) -> Result<Vec<String>, IoError> {
10519        self.conn.list_schemas()
10520    }
10521
10522    /// Introspect a table's columns. See `sql_table_schema`.
10523    pub fn columns(
10524        &self,
10525        table_name: &str,
10526        schema: Option<&str>,
10527    ) -> Result<Option<SqlTableSchema>, IoError> {
10528        self.conn.table_schema(table_name, schema)
10529    }
10530
10531    /// List user-defined indexes. See `list_sql_indexes`.
10532    pub fn indexes(
10533        &self,
10534        table_name: &str,
10535        schema: Option<&str>,
10536    ) -> Result<Vec<SqlIndexSchema>, IoError> {
10537        self.conn.list_indexes(table_name, schema)
10538    }
10539
10540    /// List foreign-key constraints. See `list_sql_foreign_keys`.
10541    pub fn foreign_keys(
10542        &self,
10543        table_name: &str,
10544        schema: Option<&str>,
10545    ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
10546        self.conn.list_foreign_keys(table_name, schema)
10547    }
10548
10549    /// List UNIQUE constraints. See `list_sql_unique_constraints`.
10550    pub fn unique_constraints(
10551        &self,
10552        table_name: &str,
10553        schema: Option<&str>,
10554    ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
10555        self.conn.list_unique_constraints(table_name, schema)
10556    }
10557
10558    /// Return primary-key columns sorted by ordinal.
10559    /// See `sql_primary_key_columns`.
10560    pub fn primary_key_columns(
10561        &self,
10562        table_name: &str,
10563        schema: Option<&str>,
10564    ) -> Result<Vec<String>, IoError> {
10565        self.conn.primary_key_columns(table_name, schema)
10566    }
10567
10568    /// Probe the table-level comment. See `sql_table_comment`.
10569    pub fn table_comment(
10570        &self,
10571        table_name: &str,
10572        schema: Option<&str>,
10573    ) -> Result<Option<String>, IoError> {
10574        self.conn.table_comment(table_name, schema)
10575    }
10576
10577    /// Schema-aware existence check. Routes to
10578    /// `SqlConnection::table_exists_in_schema`.
10579    pub fn table_exists(&self, table_name: &str, schema: Option<&str>) -> Result<bool, IoError> {
10580        self.conn.table_exists_in_schema(table_name, schema)
10581    }
10582
10583    /// Probe the backend server version. See `sql_server_version`.
10584    pub fn server_version(&self) -> Result<Option<String>, IoError> {
10585        self.conn.server_version()
10586    }
10587
10588    /// Maximum identifier length, when the backend exposes one.
10589    /// See `sql_max_identifier_length`.
10590    #[must_use]
10591    pub fn max_identifier_length(&self) -> Option<usize> {
10592        self.conn.max_identifier_length()
10593    }
10594
10595    /// Maximum bound parameters supported by this backend, if known.
10596    #[must_use]
10597    pub fn max_param_count(&self) -> Option<usize> {
10598        self.conn.max_param_count()
10599    }
10600
10601    /// Maximum INSERT rows for `column_count`, derived from the backend's
10602    /// bound-parameter ceiling.
10603    #[must_use]
10604    pub fn max_insert_rows(&self, column_count: usize) -> Option<usize> {
10605        sql_max_insert_rows_for_columns(self.conn.max_param_count(), column_count)
10606    }
10607
10608    /// Whether this backend supports native `INSERT ... RETURNING`.
10609    #[must_use]
10610    pub fn supports_returning(&self) -> bool {
10611        self.conn.supports_returning()
10612    }
10613
10614    /// Whether this backend exposes schema-qualified namespaces.
10615    #[must_use]
10616    pub fn supports_schemas(&self) -> bool {
10617        self.conn.supports_schemas()
10618    }
10619
10620    /// Gather backend capability probes into one typed bundle.
10621    pub fn backend_caps(&self) -> Result<SqlBackendCaps, IoError> {
10622        sql_backend_caps(self.conn)
10623    }
10624
10625    /// Backend dialect name (`"sqlite"`, `"postgresql"`, etc.).
10626    #[must_use]
10627    pub fn dialect_name(&self) -> &'static str {
10628        self.conn.dialect_name()
10629    }
10630
10631    /// Check whether a specific column exists on a table.
10632    ///
10633    /// Per br-frankenpandas-ppry (fd90.39). Returns `Ok(false)` when
10634    /// the table doesn't exist (i.e. `columns` returns `None`), or
10635    /// when the table exists but has no column matching `column_name`.
10636    /// Returns `Ok(true)` only when the named column is present.
10637    /// Mirrors `SQLAlchemy.Inspector.has_column()` semantics.
10638    pub fn has_column(
10639        &self,
10640        table_name: &str,
10641        column_name: &str,
10642        schema: Option<&str>,
10643    ) -> Result<bool, IoError> {
10644        let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10645            return Ok(false);
10646        };
10647        Ok(meta.column(column_name).is_some())
10648    }
10649
10650    /// Look up the metadata bundle for a single column.
10651    ///
10652    /// Per br-frankenpandas-ppry (fd90.39). Returns `Ok(None)` when the
10653    /// table doesn't exist or the column isn't present. The returned
10654    /// `SqlColumnSchema` carries the full set of fields populated by
10655    /// the underlying `table_schema` impl (declared_type, nullable,
10656    /// default_value, primary_key_ordinal, comment, autoincrement).
10657    pub fn column(
10658        &self,
10659        table_name: &str,
10660        column_name: &str,
10661        schema: Option<&str>,
10662    ) -> Result<Option<SqlColumnSchema>, IoError> {
10663        let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10664            return Ok(None);
10665        };
10666        Ok(meta.column(column_name).cloned())
10667    }
10668
10669    /// Reflect a full table's metadata in one call: columns, primary
10670    /// key, indexes, foreign keys, unique constraints, and comment.
10671    ///
10672    /// Per br-frankenpandas-76mw (fd90.40). Mirrors
10673    /// `SQLAlchemy.MetaData.reflect_table` shape — gives callers a
10674    /// single bundle instead of 5 separate fetches. Returns `Ok(None)`
10675    /// when the table doesn't exist (matched via `table_schema`
10676    /// returning `None`); otherwise all derived calls run and any
10677    /// missing pieces (e.g. SQLite's always-None `table_comment`)
10678    /// are simply preserved as their natural empty values.
10679    ///
10680    /// Per br-frankenpandas-2kzv (fd90.43): primary-key columns are
10681    /// derived directly from the `SqlTableSchema` we already fetched
10682    /// rather than dispatching `primary_key_columns()` again — that
10683    /// trait method's default impl calls `table_schema()` internally,
10684    /// which would double the round-trip count for backends where
10685    /// each call is a real network hop.
10686    pub fn reflect_table(
10687        &self,
10688        table_name: &str,
10689        schema: Option<&str>,
10690    ) -> Result<Option<SqlReflectedTable>, IoError> {
10691        let Some(meta) = self.conn.table_schema(table_name, schema)? else {
10692            return Ok(None);
10693        };
10694        let primary_key_columns = primary_keys_from_schema(&meta);
10695        let indexes = self.conn.list_indexes(table_name, schema)?;
10696        let foreign_keys = self.conn.list_foreign_keys(table_name, schema)?;
10697        let unique_constraints = self.conn.list_unique_constraints(table_name, schema)?;
10698        let comment = self.conn.table_comment(table_name, schema)?;
10699        Ok(Some(SqlReflectedTable {
10700            table_name: meta.table_name,
10701            columns: meta.columns,
10702            primary_key_columns,
10703            indexes,
10704            foreign_keys,
10705            unique_constraints,
10706            comment,
10707        }))
10708    }
10709
10710    /// Reflect every user-visible table in `schema` into a vector of
10711    /// bundles, one per table.
10712    ///
10713    /// Per br-frankenpandas-jmmo (fd90.53). Iterates `tables(schema)`
10714    /// then calls `reflect_table` on each. Skips any table that
10715    /// `reflect_table` returns `Ok(None)` for — covers the race
10716    /// condition where a table existed at list time but not at
10717    /// reflect time (e.g. concurrent DROP). Useful for whole-database
10718    /// introspection in one call.
10719    pub fn reflect_all_tables(
10720        &self,
10721        schema: Option<&str>,
10722    ) -> Result<Vec<SqlReflectedTable>, IoError> {
10723        let table_names = self.conn.list_tables(schema)?;
10724        let mut bundles = Vec::with_capacity(table_names.len());
10725        for name in table_names {
10726            if let Some(bundle) = self.reflect_table(&name, schema)? {
10727                bundles.push(bundle);
10728            }
10729        }
10730        Ok(bundles)
10731    }
10732
10733    /// Reflect every user-visible view in `schema` into a vector of
10734    /// bundles, one per view.
10735    ///
10736    /// Per br-frankenpandas-zuqt (fd90.54). View-side parity with
10737    /// `reflect_all_tables`: iterates `views(schema)` then calls
10738    /// `reflect_table` on each (PRAGMA table_info works on views too,
10739    /// returning the view's column shape). PK/FK/UC/index lists in
10740    /// the bundle will typically be empty for views since views don't
10741    /// carry constraints — only the column metadata + comment are
10742    /// meaningful. Same disappearing-entity skip semantics as
10743    /// `reflect_all_tables`.
10744    pub fn reflect_all_views(
10745        &self,
10746        schema: Option<&str>,
10747    ) -> Result<Vec<SqlReflectedTable>, IoError> {
10748        let view_names = self.conn.list_views(schema)?;
10749        let mut bundles = Vec::with_capacity(view_names.len());
10750        for name in view_names {
10751            if let Some(bundle) = self.reflect_table(&name, schema)? {
10752                bundles.push(bundle);
10753            }
10754        }
10755        Ok(bundles)
10756    }
10757}
10758
10759/// Derive the primary-key column names from an already-fetched
10760/// `SqlTableSchema`, sorted ascending by `primary_key_ordinal`.
10761///
10762/// Per br-frankenpandas-2kzv (fd90.43) / fd90.47: this is the
10763/// canonical filter+sort impl shared by both
10764/// `SqlConnection::primary_key_columns` (the trait default) and
10765/// `SqlInspector::reflect_table` (which uses already-fetched
10766/// metadata to avoid a redundant `table_schema()` round-trip).
10767fn primary_keys_from_schema(meta: &SqlTableSchema) -> Vec<String> {
10768    let mut pk: Vec<(usize, String)> = meta
10769        .columns
10770        .iter()
10771        .filter_map(|c| c.primary_key_ordinal.map(|ord| (ord, c.name.clone())))
10772        .collect();
10773    pk.sort_by_key(|(ord, _)| *ord);
10774    pk.into_iter().map(|(_, name)| name).collect()
10775}
10776
10777/// Convenience constructor for `SqlInspector`.
10778///
10779/// `let inspector = inspect(&conn);` reads more naturally than
10780/// `SqlInspector::new(&conn)` for one-shot uses. Per br-frankenpandas-szs9 (fd90.38).
10781#[must_use]
10782pub fn inspect<C: SqlConnection>(conn: &C) -> SqlInspector<'_, C> {
10783    SqlInspector::new(conn)
10784}
10785
10786/// Read an entire SQL table into a DataFrame with read-time options.
10787///
10788/// Matches the supported subset of
10789/// `pd.read_sql_table(table_name, con, parse_dates=..., coerce_float=...)`.
10790pub fn read_sql_table_with_options<C: SqlConnection>(
10791    conn: &C,
10792    table_name: &str,
10793    options: &SqlReadOptions,
10794) -> Result<DataFrame, IoError> {
10795    let query =
10796        sql_table_read_query_for_options(conn, table_name, options, options.index_col.as_deref())?;
10797    // Per br-frankenpandas-t1777: the query reader rejects options.columns
10798    // (the SELECT is already projected here). Clear before delegating so
10799    // the table reader stays the canonical honor-columns entrypoint
10800    // regardless of which slot the caller used.
10801    let cleared = SqlReadOptions {
10802        columns: None,
10803        ..options.clone()
10804    };
10805    read_sql_with_options(conn, &query, &cleared)
10806}
10807
10808fn sql_table_read_query_for_options<C: SqlConnection>(
10809    conn: &C,
10810    table_name: &str,
10811    options: &SqlReadOptions,
10812    required_projection_col: Option<&str>,
10813) -> Result<String, IoError> {
10814    // Per br-frankenpandas-d3e9 (fd90.34): when options.columns is
10815    // Some(list), project only those columns instead of SELECT *.
10816    // Per br-frankenpandas-fd90.76: if an index_col will be promoted
10817    // after materialization, include it in the generated projection even
10818    // when the user did not list it in columns. pandas SQLTable.read does
10819    // this before set_index so columns=[...] and index_col=... compose.
10820    match options.columns.as_deref() {
10821        Some(cols) => {
10822            let mut refs: Vec<&str> = Vec::with_capacity(cols.len() + 1);
10823            if let Some(index_col) = required_projection_col
10824                && !cols.iter().any(|name| name == index_col)
10825            {
10826                refs.push(index_col);
10827            }
10828            refs.extend(cols.iter().map(String::as_str));
10829            sql_select_columns_query_in_schema(conn, table_name, options.schema.as_deref(), &refs)
10830        }
10831        None => sql_select_all_query_in_schema(conn, table_name, options.schema.as_deref()),
10832    }
10833}
10834
10835/// Read an entire SQL table with read-time options and optional index promotion.
10836///
10837/// Matches the supported subset of
10838/// `pd.read_sql_table(table_name, con, parse_dates=..., coerce_float=..., index_col=...)`.
10839pub fn read_sql_table_with_options_and_index_col<C: SqlConnection>(
10840    conn: &C,
10841    table_name: &str,
10842    options: &SqlReadOptions,
10843    index_col: Option<&str>,
10844) -> Result<DataFrame, IoError> {
10845    // Per br-frankenpandas-c1h9 (fd90.36): explicit `index_col` arg
10846    // always wins over `options.index_col`. Avoid double-promotion by
10847    // clearing the option-struct copy when the explicit arg is set.
10848    if let Some(col_name) = index_col {
10849        // Build the SELECT projection from the ORIGINAL options (so
10850        // options.columns is honored when present). Per br-frankenpandas-t1777,
10851        // also strip options.columns before passing to the query reader
10852        // (which now rejects the field; the SELECT already projects the
10853        // columns we wanted).
10854        let query = sql_table_read_query_for_options(conn, table_name, options, Some(col_name))?;
10855        let cleared = SqlReadOptions {
10856            index_col: None,
10857            columns: None,
10858            ..options.clone()
10859        };
10860        let frame = read_sql_with_options(conn, &query, &cleared)?;
10861        return apply_sql_index_col(frame, Some(col_name));
10862    }
10863    read_sql_table_with_options(conn, table_name, options)
10864}
10865
10866/// Read an entire SQL table as an iterator of DataFrame chunks.
10867///
10868/// Matches the supported subset of `pd.read_sql_table(table_name, con, chunksize=...)`.
10869pub fn read_sql_table_chunks<'conn, C: SqlConnection + 'conn>(
10870    conn: &'conn C,
10871    table_name: &str,
10872    chunk_size: usize,
10873) -> Result<SqlChunkIterator<'conn>, IoError> {
10874    read_sql_chunks(conn, &sql_select_all_query(conn, table_name)?, chunk_size)
10875}
10876
10877/// Read an entire SQL table as DataFrame chunks with read-time options.
10878///
10879/// Matches the supported subset of
10880/// `pd.read_sql_table(table_name, con, parse_dates=..., coerce_float=..., chunksize=...)`.
10881pub fn read_sql_table_chunks_with_options<'conn, C: SqlConnection + 'conn>(
10882    conn: &'conn C,
10883    table_name: &str,
10884    options: &SqlReadOptions,
10885    chunk_size: usize,
10886) -> Result<SqlChunkIterator<'conn>, IoError> {
10887    // Per br-frankenpandas-i8kja: this entrypoint returns the
10888    // un-indexed SqlChunkIterator. Honoring options.index_col would
10889    // silently diverge from the full-frame read_sql_table_with_options
10890    // sibling (which does promote). Reject to surface the mismatch —
10891    // callers should use read_sql_table_chunks_with_options_and_index_col
10892    // when index_col is set.
10893    if options.index_col.is_some() {
10894        return Err(IoError::Sql(
10895            "options.index_col is set but this entrypoint returns SqlChunkIterator without \
10896             index promotion; use read_sql_table_chunks_with_options_and_index_col to honor \
10897             index_col"
10898                .to_owned(),
10899        ));
10900    }
10901    let query = match options.columns.as_deref() {
10902        Some(cols) => {
10903            let refs: Vec<&str> = cols.iter().map(String::as_str).collect();
10904            sql_select_columns_query_in_schema(conn, table_name, options.schema.as_deref(), &refs)?
10905        }
10906        None => sql_select_all_query_in_schema(conn, table_name, options.schema.as_deref())?,
10907    };
10908    // Per br-frankenpandas-t1777: query reader rejects options.columns
10909    // (the SELECT is already projected here). Clear before delegating.
10910    let cleared = SqlReadOptions {
10911        columns: None,
10912        ..options.clone()
10913    };
10914    read_sql_chunks_with_options(conn, &query, &cleared, chunk_size)
10915}
10916
10917/// Read an entire SQL table as chunks with read-time options and optional index promotion.
10918///
10919/// Matches the supported subset of
10920/// `pd.read_sql_table(table_name, con, parse_dates=..., coerce_float=..., index_col=..., chunksize=...)`.
10921pub fn read_sql_table_chunks_with_options_and_index_col<'conn, C: SqlConnection + 'conn>(
10922    conn: &'conn C,
10923    table_name: &str,
10924    options: &SqlReadOptions,
10925    index_col: Option<&str>,
10926    chunk_size: usize,
10927) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10928    let effective_index_col = index_col.or(options.index_col.as_deref());
10929    let query = sql_table_read_query_for_options(conn, table_name, options, effective_index_col)?;
10930    // The plain chunk reader rejects options.index_col (see i8kja) and
10931    // options.columns (see t1777); clear both before delegating so
10932    // chunked-with-options remains a sibling of the full-frame path
10933    // regardless of which slots the caller populated.
10934    let cleared = SqlReadOptions {
10935        index_col: None,
10936        columns: None,
10937        ..options.clone()
10938    };
10939    let inner = read_sql_chunks_with_options(conn, &query, &cleared, chunk_size)?;
10940    sql_indexed_chunks(inner, effective_index_col)
10941}
10942
10943/// Read an entire SQL table as chunks with one column promoted to each chunk's index.
10944///
10945/// Matches the supported subset of
10946/// `pd.read_sql_table(table_name, con, index_col=..., chunksize=...)`.
10947pub fn read_sql_table_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
10948    conn: &'conn C,
10949    table_name: &str,
10950    index_col: Option<&str>,
10951    chunk_size: usize,
10952) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
10953    let inner = read_sql_table_chunks(conn, table_name, chunk_size)?;
10954    sql_indexed_chunks(inner, index_col)
10955}
10956
10957/// Read a subset of columns from a SQL table.
10958///
10959/// Matches `pd.read_sql_table(table, con, columns=[...])`. The named
10960/// columns are emitted in the requested order. Each column name must
10961/// satisfy the same alphanumeric+underscore rule as `table_name` to
10962/// keep the projection injection-safe; mismatched names return
10963/// `IoError::Sql`. Empty `columns` is rejected (pandas raises in
10964/// the same case rather than producing an empty SELECT).
10965pub fn read_sql_table_columns<C: SqlConnection>(
10966    conn: &C,
10967    table_name: &str,
10968    columns: &[&str],
10969) -> Result<DataFrame, IoError> {
10970    read_sql(conn, &sql_select_columns_query(conn, table_name, columns)?)
10971}
10972
10973/// Read a subset of columns from a SQL table with optional index promotion.
10974///
10975/// Matches the supported subset of
10976/// `pd.read_sql_table(table, con, columns=[...], index_col=...)`. When
10977/// `index_col` is set and is not already in `columns`, it is auto-projected
10978/// into the underlying SELECT (matching pandas SQLTable.read which does the
10979/// same before set_index). The promoted column is removed from the data
10980/// columns after projection. Per br-frankenpandas-6n0uz.
10981pub fn read_sql_table_columns_with_index_col<C: SqlConnection>(
10982    conn: &C,
10983    table_name: &str,
10984    columns: &[&str],
10985    index_col: Option<&str>,
10986) -> Result<DataFrame, IoError> {
10987    let projection = projection_with_index_col(columns, index_col)?;
10988    let frame = read_sql_table_columns(conn, table_name, &projection)?;
10989    apply_sql_index_col(frame, index_col)
10990}
10991
10992/// Read a subset of columns from a SQL table as DataFrame chunks.
10993///
10994/// Matches the supported subset of
10995/// `pd.read_sql_table(table, con, columns=[...], chunksize=...)`. The named
10996/// columns are emitted in the requested order and each chunk receives a fresh
10997/// zero-based RangeIndex.
10998pub fn read_sql_table_columns_chunks<'conn, C: SqlConnection + 'conn>(
10999    conn: &'conn C,
11000    table_name: &str,
11001    columns: &[&str],
11002    chunk_size: usize,
11003) -> Result<SqlChunkIterator<'conn>, IoError> {
11004    read_sql_chunks(
11005        conn,
11006        &sql_select_columns_query(conn, table_name, columns)?,
11007        chunk_size,
11008    )
11009}
11010
11011/// Read a subset of columns from a SQL table as chunks with optional index promotion.
11012///
11013/// Matches the supported subset of
11014/// `pd.read_sql_table(table, con, columns=[...], index_col=..., chunksize=...)`.
11015/// When `index_col` is set and is not already in `columns`, it is auto-projected
11016/// into the underlying SELECT (matching pandas SQLTable.read). The promoted
11017/// column is removed from each chunk after projection. Per br-frankenpandas-6n0uz.
11018pub fn read_sql_table_columns_chunks_with_index_col<'conn, C: SqlConnection + 'conn>(
11019    conn: &'conn C,
11020    table_name: &str,
11021    columns: &[&str],
11022    index_col: Option<&str>,
11023    chunk_size: usize,
11024) -> Result<SqlIndexedChunkIterator<'conn>, IoError> {
11025    let projection = projection_with_index_col(columns, index_col)?;
11026    let inner = read_sql_table_columns_chunks(conn, table_name, &projection, chunk_size)?;
11027    sql_indexed_chunks(inner, index_col)
11028}
11029
11030/// Per br-frankenpandas-6n0uz: helper that prepends `index_col` to a
11031/// `columns` projection list if it isn't already present. Mirrors the
11032/// inline logic in `sql_table_read_query_for_options` (fd90.76) so the
11033/// columns-list and options-based read paths agree on the auto-include
11034/// rule while preserving the public `index_col=""` and empty-projection
11035/// error contracts.
11036fn projection_with_index_col<'a>(
11037    columns: &'a [&'a str],
11038    index_col: Option<&'a str>,
11039) -> Result<Vec<&'a str>, IoError> {
11040    match index_col {
11041        Some("") => Err(IoError::Sql(
11042            "index_col: empty string is not a valid column name".to_owned(),
11043        )),
11044        Some(name) if !columns.is_empty() && !columns.contains(&name) => {
11045            let mut out = Vec::with_capacity(columns.len() + 1);
11046            out.push(name);
11047            out.extend_from_slice(columns);
11048            Ok(out)
11049        }
11050        _ => Ok(columns.to_vec()),
11051    }
11052}
11053
11054/// Write a DataFrame to a SQL table.
11055///
11056/// Matches `pd.DataFrame.to_sql(name, con)`.
11057pub fn write_sql<C: SqlConnection>(
11058    frame: &DataFrame,
11059    conn: &C,
11060    table_name: &str,
11061    if_exists: SqlIfExists,
11062) -> Result<(), IoError> {
11063    write_sql_with_options(
11064        frame,
11065        conn,
11066        table_name,
11067        &SqlWriteOptions {
11068            if_exists,
11069            index: false,
11070            index_label: None,
11071            schema: None,
11072            dtype: None,
11073            method: SqlInsertMethod::Single,
11074            chunksize: None,
11075        },
11076    )
11077}
11078
11079/// Write a DataFrame to a SQLite table with pandas-style index options.
11080///
11081/// Matches the supported subset of
11082/// `pd.DataFrame.to_sql(name, con, index=..., index_label=...)`.
11083pub fn write_sql_with_options<C: SqlConnection>(
11084    frame: &DataFrame,
11085    conn: &C,
11086    table_name: &str,
11087    options: &SqlWriteOptions,
11088) -> Result<(), IoError> {
11089    // Validate table name to prevent SQL injection (only allow alphanumeric + underscore, non-empty).
11090    if table_name.is_empty() || !table_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
11091        return Err(IoError::Sql(format!(
11092            "invalid table name: '{table_name}' (must be non-empty, only alphanumeric and underscore allowed)"
11093        )));
11094    }
11095
11096    let col_names: Vec<String> = frame.column_names().into_iter().cloned().collect();
11097    let index_label = resolve_sql_index_label(frame, options)?;
11098    let mut sql_col_names =
11099        Vec::with_capacity(col_names.len() + usize::from(index_label.is_some()));
11100    if let Some(ref label) = index_label {
11101        sql_col_names.push(label.clone());
11102    }
11103    sql_col_names.extend(col_names.iter().cloned());
11104
11105    // Per fd90.27: when the backend reports an identifier-length cap
11106    // (PG=63, MySQL=64, MSSQL=128), reject any identifier that exceeds
11107    // it before emitting DDL. SQLite (None) is unaffected.
11108    let max_ident = conn.max_identifier_length();
11109    validate_sql_identifier_length(table_name, max_ident, "table")?;
11110    if let Some(ref label) = index_label {
11111        validate_sql_identifier_length(label, max_ident, "index label")?;
11112    }
11113    for name in &col_names {
11114        validate_sql_identifier_length(name, max_ident, "column")?;
11115    }
11116    if let Some(s) = options.schema.as_deref() {
11117        validate_sql_identifier_length(s, max_ident, "schema")?;
11118    }
11119
11120    // Handle if_exists policy.
11121    let schema = options.schema.as_deref();
11122    match options.if_exists {
11123        SqlIfExists::Fail => {
11124            let exists = conn.table_exists_in_schema(table_name, schema)?;
11125            if exists {
11126                return Err(IoError::Sql(format!("table '{table_name}' already exists")));
11127            }
11128        }
11129        SqlIfExists::Replace => {
11130            let drop_sql = sql_drop_table_query_in_schema(conn, table_name, schema)?;
11131            conn.execute_batch(&drop_sql)?;
11132        }
11133        SqlIfExists::Append => {
11134            // Table may or may not exist; CREATE TABLE IF NOT EXISTS handles both.
11135        }
11136    }
11137
11138    // Build CREATE TABLE statement.
11139    let mut col_defs = Vec::with_capacity(sql_col_names.len());
11140    if let Some(ref label) = index_label {
11141        col_defs.push(sql_column_definition(
11142            conn,
11143            label,
11144            conn.index_dtype_sql(frame.index()),
11145        )?);
11146    }
11147    let dtype_overrides = options.dtype.as_ref();
11148    col_defs.extend(
11149        col_names
11150            .iter()
11151            .map(|name| {
11152                // Per br-frankenpandas-ev2s (fd90.18): explicit per-column
11153                // SQL-type override wins over the inferred conn.dtype_sql.
11154                let override_sql = dtype_overrides
11155                    .and_then(|m| m.get(name))
11156                    .map(|s| s.as_str());
11157                let sql_type = match override_sql {
11158                    Some(s) => s,
11159                    None => {
11160                        let dt = frame.column(name).map_or(DType::Utf8, |c| c.dtype());
11161                        conn.dtype_sql(dt)
11162                    }
11163                };
11164                sql_column_definition(conn, name, sql_type)
11165            })
11166            .collect::<Result<Vec<_>, IoError>>()?,
11167    );
11168
11169    let create_sql = sql_create_table_query_in_schema(conn, table_name, schema, &col_defs)?;
11170    conn.execute_batch(&create_sql)?;
11171
11172    let nrows = frame.index().len();
11173    let ncols = sql_col_names.len();
11174    let mut rows = Vec::with_capacity(nrows);
11175    for row_idx in 0..nrows {
11176        let mut row = Vec::with_capacity(ncols);
11177        if options.index {
11178            row.push(scalar_from_index_label(&frame.index().labels()[row_idx]));
11179        }
11180        row.extend(col_names.iter().map(|name| {
11181            frame
11182                .column(name)
11183                .and_then(|col| col.value(row_idx))
11184                .cloned()
11185                .unwrap_or(Scalar::Null(NullKind::Null))
11186        }));
11187        rows.push(row);
11188    }
11189
11190    if rows.is_empty() {
11191        // Empty frame: still emit CREATE TABLE (already done) but skip INSERT.
11192        return Ok(());
11193    }
11194
11195    // Per fd90.33: pandas-style chunksize. None preserves prior
11196    // single-transaction semantics; Some(0) is rejected (matches pandas).
11197    if let Some(0) = options.chunksize {
11198        return Err(IoError::Sql(
11199            "invalid chunksize: 0 (must be > 0 if Some)".to_owned(),
11200        ));
11201    }
11202
11203    match options.method {
11204        SqlInsertMethod::Single => {
11205            let insert_sql =
11206                sql_insert_rows_query_in_schema(conn, table_name, schema, &sql_col_names)?;
11207            match options.chunksize {
11208                None => {
11209                    conn.insert_rows(&insert_sql, &rows)?;
11210                }
11211                Some(n) => {
11212                    for chunk in rows.chunks(n) {
11213                        conn.insert_rows(&insert_sql, chunk)?;
11214                    }
11215                }
11216            }
11217        }
11218        SqlInsertMethod::Multi => {
11219            // Per fd90.19: chunk rows to fit `max_param_count`. When the
11220            // backend reports None, send the whole frame in one statement.
11221            // Per fd90.33: when chunksize is also Some(n), the effective
11222            // chunk row count is min(n, max_param_count / num_cols).
11223            let param_chunk = match conn.max_param_count() {
11224                Some(max) if ncols > 0 => {
11225                    let per_chunk = max / ncols;
11226                    if per_chunk == 0 {
11227                        return Err(IoError::Sql(format!(
11228                            "multi-row insert: ncols={ncols} exceeds backend max_param_count={max}"
11229                        )));
11230                    }
11231                    per_chunk
11232                }
11233                _ => rows.len(),
11234            };
11235            let chunk_rows = options
11236                .chunksize
11237                .map(|cs| cs.min(param_chunk))
11238                .unwrap_or(param_chunk);
11239            for chunk in rows.chunks(chunk_rows) {
11240                let chunk_sql = sql_multi_row_insert_query_in_schema(
11241                    conn,
11242                    table_name,
11243                    schema,
11244                    &sql_col_names,
11245                    chunk.len(),
11246                )?;
11247                let mut flat = Vec::with_capacity(chunk.len() * ncols);
11248                for row in chunk {
11249                    flat.extend(row.iter().cloned());
11250                }
11251                conn.insert_rows(&chunk_sql, &[flat])?;
11252            }
11253        }
11254    }
11255
11256    Ok(())
11257}
11258
11259// ── Extension trait for DataFrame IO convenience methods ─────────────
11260
11261/// Extension trait that adds IO convenience methods to `DataFrame`.
11262///
11263/// Import this trait to call `df.to_parquet(path)`, `df.to_orc(path)`,
11264/// `df.to_hdf(path)`, `df.to_parquet_bytes()`, etc. directly on DataFrame
11265/// values.
11266pub trait DataFrameIoExt {
11267    /// Write this DataFrame to a Parquet file.
11268    ///
11269    /// Matches `pd.DataFrame.to_parquet(path)`.
11270    fn to_parquet(&self, path: &Path) -> Result<(), IoError>;
11271
11272    /// Serialize this DataFrame to Parquet bytes in memory.
11273    ///
11274    /// Matches `pd.DataFrame.to_parquet()` with no path (returns bytes).
11275    fn to_parquet_bytes(&self) -> Result<Vec<u8>, IoError>;
11276
11277    /// Write this DataFrame to an ORC file.
11278    ///
11279    /// Matches the scoped `DataFrame.to_orc(path)` compatibility surface.
11280    fn to_orc(&self, path: &Path) -> Result<(), IoError>;
11281
11282    /// Write this DataFrame to an ORC file.
11283    ///
11284    /// Explicit file-suffixed form of [`DataFrameIoExt::to_orc`].
11285    fn to_orc_file(&self, path: &Path) -> Result<(), IoError>;
11286
11287    /// Serialize this DataFrame to ORC bytes in memory.
11288    fn to_orc_bytes(&self) -> Result<Vec<u8>, IoError>;
11289
11290    /// Write this DataFrame to an HDF5 file at the default key.
11291    ///
11292    /// Matches the scoped `DataFrame.to_hdf(path)` compatibility surface.
11293    fn to_hdf(&self, path: &Path) -> Result<(), IoError>;
11294
11295    /// Write this DataFrame to an HDF5 file at the default key.
11296    ///
11297    /// Explicit file-suffixed form of [`DataFrameIoExt::to_hdf`].
11298    fn to_hdf_file(&self, path: &Path) -> Result<(), IoError>;
11299
11300    /// Write this DataFrame to an HDF5 file at an explicit key.
11301    fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError>;
11302
11303    /// Write this DataFrame to an HDF5 file with explicit options.
11304    fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError>;
11305
11306    /// Write this DataFrame to a CSV file.
11307    ///
11308    /// Matches `pd.DataFrame.to_csv(path)`.
11309    fn to_csv_file(&self, path: &Path) -> Result<(), IoError>;
11310
11311    /// Serialize this DataFrame to a CSV string.
11312    ///
11313    /// Matches `pd.DataFrame.to_csv()` with no path.
11314    fn to_csv_string(&self) -> Result<String, IoError>;
11315
11316    /// Serialize this DataFrame to a CSV string with explicit write options.
11317    ///
11318    /// Matches `pd.DataFrame.to_csv(sep, na_rep, header, index, index_label)`.
11319    fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError>;
11320
11321    /// Serialize this DataFrame to a Markdown table string.
11322    ///
11323    /// Matches `pd.DataFrame.to_markdown()` with no buffer.
11324    fn to_markdown_string(&self) -> Result<String, IoError>;
11325
11326    /// Serialize this DataFrame to a Markdown table string with explicit options.
11327    fn to_markdown_string_with_options(
11328        &self,
11329        options: &MarkdownWriteOptions,
11330    ) -> Result<String, IoError>;
11331
11332    /// Write this DataFrame to a Markdown table file.
11333    ///
11334    /// Uses a file-suffixed name to avoid colliding with
11335    /// `DataFrame::to_markdown(include_index, tablefmt)`.
11336    fn to_markdown_file(&self, path: &Path) -> Result<(), IoError>;
11337
11338    /// Write this DataFrame to a Markdown table file with explicit options.
11339    fn to_markdown_file_with_options(
11340        &self,
11341        path: &Path,
11342        options: &MarkdownWriteOptions,
11343    ) -> Result<(), IoError>;
11344
11345    /// Serialize this DataFrame to a LaTeX tabular string.
11346    ///
11347    /// Matches `pd.DataFrame.to_latex()` with no buffer.
11348    fn to_latex_string(&self) -> Result<String, IoError>;
11349
11350    /// Serialize this DataFrame to a LaTeX tabular string with explicit options.
11351    fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError>;
11352
11353    /// Write this DataFrame to a LaTeX tabular file.
11354    ///
11355    /// Uses a file-suffixed name to avoid colliding with
11356    /// `DataFrame::to_latex(include_index)`.
11357    fn to_latex_file(&self, path: &Path) -> Result<(), IoError>;
11358
11359    /// Write this DataFrame to a LaTeX tabular file with explicit options.
11360    fn to_latex_file_with_options(
11361        &self,
11362        path: &Path,
11363        options: &LatexWriteOptions,
11364    ) -> Result<(), IoError>;
11365
11366    /// Serialize this DataFrame to an HTML table string.
11367    ///
11368    /// Matches `pd.DataFrame.to_html()` with no buffer.
11369    fn to_html_string(&self) -> Result<String, IoError>;
11370
11371    /// Serialize this DataFrame to an HTML table string with explicit options.
11372    fn to_html_string_with_options(&self, options: &HtmlWriteOptions) -> Result<String, IoError>;
11373
11374    /// Write this DataFrame to an HTML file.
11375    ///
11376    /// Matches `pd.DataFrame.to_html(path)`.
11377    fn to_html_file(&self, path: &Path) -> Result<(), IoError>;
11378
11379    /// Write this DataFrame to an HTML file with explicit options.
11380    fn to_html_file_with_options(
11381        &self,
11382        path: &Path,
11383        options: &HtmlWriteOptions,
11384    ) -> Result<(), IoError>;
11385
11386    /// Serialize this DataFrame to an XML document string.
11387    ///
11388    /// Matches `pd.DataFrame.to_xml()` with no buffer for the writer-only subset.
11389    fn to_xml_string(&self) -> Result<String, IoError>;
11390
11391    /// Serialize this DataFrame to an XML document string with explicit options.
11392    fn to_xml_string_with_options(&self, options: &XmlWriteOptions) -> Result<String, IoError>;
11393
11394    /// Write this DataFrame to an XML file.
11395    ///
11396    /// Matches `pd.DataFrame.to_xml(path)`.
11397    fn to_xml(&self, path: &Path) -> Result<(), IoError>;
11398
11399    /// Write this DataFrame to an XML file.
11400    ///
11401    /// Matches `pd.DataFrame.to_xml(path)`.
11402    fn to_xml_file(&self, path: &Path) -> Result<(), IoError>;
11403
11404    /// Write this DataFrame to an XML file with explicit options.
11405    fn to_xml_file_with_options(
11406        &self,
11407        path: &Path,
11408        options: &XmlWriteOptions,
11409    ) -> Result<(), IoError>;
11410
11411    /// Write this DataFrame to a JSON file.
11412    ///
11413    /// Matches `pd.DataFrame.to_json(path)`.
11414    fn to_json_file(&self, path: &Path, orient: JsonOrient) -> Result<(), IoError>;
11415
11416    /// Serialize this DataFrame to a JSON string.
11417    ///
11418    /// Matches `pd.DataFrame.to_json()` with no path.
11419    fn to_json_string(&self, orient: JsonOrient) -> Result<String, IoError>;
11420
11421    /// Write this DataFrame to a Pickle file.
11422    ///
11423    /// Matches `pd.DataFrame.to_pickle(path)` for the supported envelope.
11424    fn to_pickle(&self, path: &Path) -> Result<(), IoError>;
11425
11426    /// Write this DataFrame to a Pickle file.
11427    ///
11428    /// Explicit file-suffixed form of [`DataFrameIoExt::to_pickle`].
11429    fn to_pickle_file(&self, path: &Path) -> Result<(), IoError>;
11430
11431    /// Write this DataFrame to a Pickle file with explicit options.
11432    fn to_pickle_with_options(
11433        &self,
11434        path: &Path,
11435        options: &PickleWriteOptions,
11436    ) -> Result<(), IoError>;
11437
11438    /// Serialize this DataFrame to Pickle bytes.
11439    fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError>;
11440
11441    /// Serialize this DataFrame to Pickle bytes with explicit options.
11442    fn to_pickle_bytes_with_options(
11443        &self,
11444        options: &PickleWriteOptions,
11445    ) -> Result<Vec<u8>, IoError>;
11446
11447    /// Write this DataFrame to a Stata DTA file.
11448    ///
11449    /// Matches `pd.DataFrame.to_stata(path)` for the supported subset.
11450    fn to_stata(&self, path: &Path) -> Result<(), IoError>;
11451
11452    /// Write this DataFrame to a Stata DTA file.
11453    ///
11454    /// Explicit file-suffixed form of [`DataFrameIoExt::to_stata`].
11455    fn to_stata_file(&self, path: &Path) -> Result<(), IoError>;
11456
11457    /// Write this DataFrame to a Stata DTA file with explicit options.
11458    fn to_stata_with_options(
11459        &self,
11460        path: &Path,
11461        options: &StataWriteOptions,
11462    ) -> Result<(), IoError>;
11463
11464    /// Serialize this DataFrame to Stata DTA bytes.
11465    fn to_stata_bytes(&self) -> Result<Vec<u8>, IoError>;
11466
11467    /// Serialize this DataFrame to Stata DTA bytes with explicit options.
11468    fn to_stata_bytes_with_options(&self, options: &StataWriteOptions) -> Result<Vec<u8>, IoError>;
11469
11470    /// Write this DataFrame to an Excel (.xlsx) file.
11471    ///
11472    /// Matches `pd.DataFrame.to_excel(path)`.
11473    fn to_excel(&self, path: &Path) -> Result<(), IoError>;
11474
11475    /// Write this DataFrame to an Excel (.xlsx) file.
11476    ///
11477    /// Explicit file-suffixed form of [`DataFrameIoExt::to_excel`].
11478    fn to_excel_file(&self, path: &Path) -> Result<(), IoError>;
11479
11480    /// Write this DataFrame to an Excel (.xlsx) file with explicit write options.
11481    fn to_excel_with_options(
11482        &self,
11483        path: &Path,
11484        options: &ExcelWriteOptions,
11485    ) -> Result<(), IoError>;
11486
11487    /// Serialize this DataFrame to Excel (.xlsx) bytes in memory.
11488    fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError>;
11489
11490    /// Serialize this DataFrame to Excel (.xlsx) bytes with explicit write options.
11491    fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError>;
11492
11493    /// Write this DataFrame to a JSONL file (one JSON object per line).
11494    ///
11495    /// Matches `pd.DataFrame.to_json(path, orient='records', lines=True)`.
11496    fn to_jsonl_file(&self, path: &Path) -> Result<(), IoError>;
11497
11498    /// Serialize this DataFrame to newline-delimited JSON.
11499    ///
11500    /// Matches `pd.DataFrame.to_json(orient='records', lines=True)`.
11501    fn to_jsonl_string(&self) -> Result<String, IoError>;
11502
11503    /// Write this DataFrame to an Arrow IPC (Feather v2) file.
11504    ///
11505    /// Matches `pd.DataFrame.to_feather(path)`.
11506    fn to_feather(&self, path: &Path) -> Result<(), IoError>;
11507
11508    /// Write this DataFrame to an Arrow IPC (Feather v2) file.
11509    ///
11510    /// Explicit file-suffixed form of [`DataFrameIoExt::to_feather`].
11511    fn to_feather_file(&self, path: &Path) -> Result<(), IoError>;
11512
11513    /// Serialize this DataFrame to Arrow IPC (Feather v2) bytes.
11514    fn to_feather_bytes(&self) -> Result<Vec<u8>, IoError>;
11515
11516    /// Write this DataFrame to a SQL table.
11517    ///
11518    /// Matches `pd.DataFrame.to_sql(name, con)`.
11519    fn to_sql<C: SqlConnection>(
11520        &self,
11521        conn: &C,
11522        table_name: &str,
11523        if_exists: SqlIfExists,
11524    ) -> Result<(), IoError>;
11525
11526    /// Write this DataFrame to a SQL table with pandas-style SQL write options.
11527    fn to_sql_with_options<C: SqlConnection>(
11528        &self,
11529        conn: &C,
11530        table_name: &str,
11531        options: &SqlWriteOptions,
11532    ) -> Result<(), IoError>;
11533
11534    /// Reject-closed clipboard writer, matching `pd.DataFrame.to_clipboard()` shape.
11535    fn to_clipboard(&self) -> Result<(), IoError>;
11536
11537    /// Reject-closed BigQuery writer, matching `pd.DataFrame.to_gbq(destination_table, project_id)`.
11538    fn to_gbq(&self, destination_table: &str, project_id: Option<&str>) -> Result<(), IoError>;
11539}
11540
11541impl DataFrameIoExt for DataFrame {
11542    fn to_parquet(&self, path: &Path) -> Result<(), IoError> {
11543        write_parquet(self, path)
11544    }
11545
11546    fn to_parquet_bytes(&self) -> Result<Vec<u8>, IoError> {
11547        write_parquet_bytes(self)
11548    }
11549
11550    fn to_orc(&self, path: &Path) -> Result<(), IoError> {
11551        write_orc(self, path)
11552    }
11553
11554    fn to_orc_file(&self, path: &Path) -> Result<(), IoError> {
11555        self.to_orc(path)
11556    }
11557
11558    fn to_orc_bytes(&self) -> Result<Vec<u8>, IoError> {
11559        write_orc_bytes(self)
11560    }
11561
11562    fn to_hdf(&self, path: &Path) -> Result<(), IoError> {
11563        write_hdf(self, path)
11564    }
11565
11566    fn to_hdf_file(&self, path: &Path) -> Result<(), IoError> {
11567        self.to_hdf(path)
11568    }
11569
11570    fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError> {
11571        write_hdf_key(self, path, key)
11572    }
11573
11574    fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError> {
11575        write_hdf_with_options(self, path, options)
11576    }
11577
11578    fn to_csv_file(&self, path: &Path) -> Result<(), IoError> {
11579        write_csv(self, path)
11580    }
11581
11582    fn to_csv_string(&self) -> Result<String, IoError> {
11583        write_csv_string(self)
11584    }
11585
11586    fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError> {
11587        write_csv_string_with_options(self, options)
11588    }
11589
11590    fn to_markdown_string(&self) -> Result<String, IoError> {
11591        write_markdown_string(self)
11592    }
11593
11594    fn to_markdown_string_with_options(
11595        &self,
11596        options: &MarkdownWriteOptions,
11597    ) -> Result<String, IoError> {
11598        write_markdown_string_with_options(self, options)
11599    }
11600
11601    fn to_markdown_file(&self, path: &Path) -> Result<(), IoError> {
11602        write_markdown(self, path)
11603    }
11604
11605    fn to_markdown_file_with_options(
11606        &self,
11607        path: &Path,
11608        options: &MarkdownWriteOptions,
11609    ) -> Result<(), IoError> {
11610        write_markdown_with_options(self, path, options)
11611    }
11612
11613    fn to_latex_string(&self) -> Result<String, IoError> {
11614        write_latex_string(self)
11615    }
11616
11617    fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError> {
11618        write_latex_string_with_options(self, options)
11619    }
11620
11621    fn to_latex_file(&self, path: &Path) -> Result<(), IoError> {
11622        write_latex(self, path)
11623    }
11624
11625    fn to_latex_file_with_options(
11626        &self,
11627        path: &Path,
11628        options: &LatexWriteOptions,
11629    ) -> Result<(), IoError> {
11630        write_latex_with_options(self, path, options)
11631    }
11632
11633    fn to_html_string(&self) -> Result<String, IoError> {
11634        write_html_string(self)
11635    }
11636
11637    fn to_html_string_with_options(&self, options: &HtmlWriteOptions) -> Result<String, IoError> {
11638        write_html_string_with_options(self, options)
11639    }
11640
11641    fn to_html_file(&self, path: &Path) -> Result<(), IoError> {
11642        write_html(self, path)
11643    }
11644
11645    fn to_html_file_with_options(
11646        &self,
11647        path: &Path,
11648        options: &HtmlWriteOptions,
11649    ) -> Result<(), IoError> {
11650        write_html_with_options(self, path, options)
11651    }
11652
11653    fn to_xml_string(&self) -> Result<String, IoError> {
11654        write_xml_string(self)
11655    }
11656
11657    fn to_xml_string_with_options(&self, options: &XmlWriteOptions) -> Result<String, IoError> {
11658        write_xml_string_with_options(self, options)
11659    }
11660
11661    fn to_xml(&self, path: &Path) -> Result<(), IoError> {
11662        write_xml(self, path)
11663    }
11664
11665    fn to_xml_file(&self, path: &Path) -> Result<(), IoError> {
11666        self.to_xml(path)
11667    }
11668
11669    fn to_xml_file_with_options(
11670        &self,
11671        path: &Path,
11672        options: &XmlWriteOptions,
11673    ) -> Result<(), IoError> {
11674        write_xml_with_options(self, path, options)
11675    }
11676
11677    fn to_json_file(&self, path: &Path, orient: JsonOrient) -> Result<(), IoError> {
11678        write_json(self, path, orient)
11679    }
11680
11681    fn to_json_string(&self, orient: JsonOrient) -> Result<String, IoError> {
11682        write_json_string(self, orient)
11683    }
11684
11685    fn to_pickle(&self, path: &Path) -> Result<(), IoError> {
11686        write_pickle(self, path)
11687    }
11688
11689    fn to_pickle_file(&self, path: &Path) -> Result<(), IoError> {
11690        self.to_pickle(path)
11691    }
11692
11693    fn to_pickle_with_options(
11694        &self,
11695        path: &Path,
11696        options: &PickleWriteOptions,
11697    ) -> Result<(), IoError> {
11698        write_pickle_with_options(self, path, options)
11699    }
11700
11701    fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError> {
11702        write_pickle_bytes(self)
11703    }
11704
11705    fn to_pickle_bytes_with_options(
11706        &self,
11707        options: &PickleWriteOptions,
11708    ) -> Result<Vec<u8>, IoError> {
11709        write_pickle_bytes_with_options(self, options)
11710    }
11711
11712    fn to_stata(&self, path: &Path) -> Result<(), IoError> {
11713        write_stata(self, path)
11714    }
11715
11716    fn to_stata_file(&self, path: &Path) -> Result<(), IoError> {
11717        self.to_stata(path)
11718    }
11719
11720    fn to_stata_with_options(
11721        &self,
11722        path: &Path,
11723        options: &StataWriteOptions,
11724    ) -> Result<(), IoError> {
11725        write_stata_with_options(self, path, options)
11726    }
11727
11728    fn to_stata_bytes(&self) -> Result<Vec<u8>, IoError> {
11729        write_stata_bytes(self)
11730    }
11731
11732    fn to_stata_bytes_with_options(&self, options: &StataWriteOptions) -> Result<Vec<u8>, IoError> {
11733        write_stata_bytes_with_options(self, options)
11734    }
11735
11736    fn to_excel(&self, path: &Path) -> Result<(), IoError> {
11737        write_excel(self, path)
11738    }
11739
11740    fn to_excel_file(&self, path: &Path) -> Result<(), IoError> {
11741        self.to_excel(path)
11742    }
11743
11744    fn to_excel_with_options(
11745        &self,
11746        path: &Path,
11747        options: &ExcelWriteOptions,
11748    ) -> Result<(), IoError> {
11749        write_excel_with_options(self, path, options)
11750    }
11751
11752    fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError> {
11753        write_excel_bytes(self)
11754    }
11755
11756    fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError> {
11757        write_excel_bytes_with_options(self, options)
11758    }
11759
11760    fn to_jsonl_file(&self, path: &Path) -> Result<(), IoError> {
11761        write_jsonl(self, path)
11762    }
11763
11764    fn to_jsonl_string(&self) -> Result<String, IoError> {
11765        write_jsonl_string(self)
11766    }
11767
11768    fn to_feather(&self, path: &Path) -> Result<(), IoError> {
11769        write_feather(self, path)
11770    }
11771
11772    fn to_feather_file(&self, path: &Path) -> Result<(), IoError> {
11773        self.to_feather(path)
11774    }
11775
11776    fn to_feather_bytes(&self) -> Result<Vec<u8>, IoError> {
11777        write_feather_bytes(self)
11778    }
11779
11780    fn to_sql<C: SqlConnection>(
11781        &self,
11782        conn: &C,
11783        table_name: &str,
11784        if_exists: SqlIfExists,
11785    ) -> Result<(), IoError> {
11786        write_sql(self, conn, table_name, if_exists)
11787    }
11788
11789    fn to_sql_with_options<C: SqlConnection>(
11790        &self,
11791        conn: &C,
11792        table_name: &str,
11793        options: &SqlWriteOptions,
11794    ) -> Result<(), IoError> {
11795        write_sql_with_options(self, conn, table_name, options)
11796    }
11797
11798    fn to_clipboard(&self) -> Result<(), IoError> {
11799        let _ = self;
11800        Err(deferred_writer_error(
11801            "to_clipboard",
11802            "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
11803        ))
11804    }
11805
11806    fn to_gbq(&self, _destination_table: &str, _project_id: Option<&str>) -> Result<(), IoError> {
11807        let _ = self;
11808        Err(deferred_writer_error(
11809            "to_gbq",
11810            "Google BigQuery integration is outside FrankenPandas's local file-format scope",
11811        ))
11812    }
11813}
11814
11815// ── Extension trait for Series IO convenience methods ─────────────────
11816
11817/// Extension trait that adds IO convenience methods to `Series`.
11818///
11819/// Import this trait to call `series.to_pickle(path)`,
11820/// `series.to_pickle_bytes()`, `series.to_hdf(path)`,
11821/// `series.to_csv_string()`, `series.to_markdown_string()`,
11822/// `series.to_latex_string()`, `series.to_json_string("records")`,
11823/// `series.to_hdf(path)`, `series.to_excel(path)`,
11824/// `series.to_sql(conn, table, if_exists)`, or `series.to_clipboard()`
11825/// directly on Series values.
11826pub trait SeriesIoExt {
11827    /// Write this Series to a Pickle file.
11828    ///
11829    /// Matches `pd.Series.to_pickle(path)` for the supported
11830    /// FrankenPandas pickle envelope.
11831    fn to_pickle(&self, path: &Path) -> Result<(), IoError>;
11832
11833    /// Write this Series to a Pickle file.
11834    ///
11835    /// Explicit file-suffixed form of [`SeriesIoExt::to_pickle`].
11836    fn to_pickle_file(&self, path: &Path) -> Result<(), IoError>;
11837
11838    /// Write this Series to a Pickle file with explicit options.
11839    fn to_pickle_with_options(
11840        &self,
11841        path: &Path,
11842        options: &PickleWriteOptions,
11843    ) -> Result<(), IoError>;
11844
11845    /// Serialize this Series to Pickle bytes.
11846    fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError>;
11847
11848    /// Serialize this Series to Pickle bytes with explicit options.
11849    fn to_pickle_bytes_with_options(
11850        &self,
11851        options: &PickleWriteOptions,
11852    ) -> Result<Vec<u8>, IoError>;
11853
11854    /// Write this Series to a CSV file.
11855    ///
11856    /// Matches `pd.Series.to_csv(path)` for the supported CSV writer surface,
11857    /// including pandas' default index materialization.
11858    fn to_csv_file(&self, path: &Path) -> Result<(), IoError>;
11859
11860    /// Write this Series to a CSV file with explicit write options.
11861    fn to_csv_file_with_options(
11862        &self,
11863        path: &Path,
11864        options: &CsvWriteOptions,
11865    ) -> Result<(), IoError>;
11866
11867    /// Serialize this Series to a CSV string.
11868    ///
11869    /// Matches `pd.Series.to_csv()` with no path for the supported writer
11870    /// surface.
11871    fn to_csv_string(&self) -> Result<String, IoError>;
11872
11873    /// Serialize this Series to a CSV string with explicit write options.
11874    fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError>;
11875
11876    /// Serialize this Series to a Markdown table string.
11877    ///
11878    /// Matches `pd.Series.to_markdown()` with no buffer for the supported
11879    /// table formatter surface.
11880    fn to_markdown_string(&self) -> Result<String, IoError>;
11881
11882    /// Serialize this Series to a Markdown table string with explicit options.
11883    fn to_markdown_string_with_options(
11884        &self,
11885        options: &MarkdownWriteOptions,
11886    ) -> Result<String, IoError>;
11887
11888    /// Write this Series to a Markdown table file.
11889    ///
11890    /// Uses a file-suffixed name to avoid colliding with
11891    /// `Series::to_markdown(include_index, tablefmt)`.
11892    fn to_markdown_file(&self, path: &Path) -> Result<(), IoError>;
11893
11894    /// Write this Series to a Markdown table file with explicit options.
11895    fn to_markdown_file_with_options(
11896        &self,
11897        path: &Path,
11898        options: &MarkdownWriteOptions,
11899    ) -> Result<(), IoError>;
11900
11901    /// Serialize this Series to a LaTeX tabular string.
11902    ///
11903    /// Matches `pd.Series.to_latex()` with no buffer for the supported table
11904    /// formatter surface.
11905    fn to_latex_string(&self) -> Result<String, IoError>;
11906
11907    /// Serialize this Series to a LaTeX tabular string with explicit options.
11908    fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError>;
11909
11910    /// Write this Series to a LaTeX tabular file.
11911    ///
11912    /// Uses a file-suffixed name to avoid colliding with
11913    /// `Series::to_latex(include_index)`.
11914    fn to_latex_file(&self, path: &Path) -> Result<(), IoError>;
11915
11916    /// Write this Series to a LaTeX tabular file with explicit options.
11917    fn to_latex_file_with_options(
11918        &self,
11919        path: &Path,
11920        options: &LatexWriteOptions,
11921    ) -> Result<(), IoError>;
11922
11923    /// Write this Series to a JSON file.
11924    ///
11925    /// Matches `pd.Series.to_json(path, orient=...)` for the supported Series
11926    /// JSON orientations.
11927    fn to_json_file(&self, path: &Path, orient: &str) -> Result<(), IoError>;
11928
11929    /// Serialize this Series to a JSON string.
11930    ///
11931    /// Matches `pd.Series.to_json(orient=...)` for the supported Series JSON
11932    /// orientations.
11933    fn to_json_string(&self, orient: &str) -> Result<String, IoError>;
11934
11935    /// Write this Series to an HDF5 file at the default key.
11936    ///
11937    /// Matches `pd.Series.to_hdf(path)` for the supported HDF5 snapshot
11938    /// surface.
11939    fn to_hdf(&self, path: &Path) -> Result<(), IoError>;
11940
11941    /// Write this Series to an HDF5 file at the default key.
11942    ///
11943    /// Explicit file-suffixed form of [`SeriesIoExt::to_hdf`].
11944    fn to_hdf_file(&self, path: &Path) -> Result<(), IoError>;
11945
11946    /// Write this Series to an HDF5 file at an explicit key.
11947    fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError>;
11948
11949    /// Write this Series to an HDF5 file with explicit options.
11950    fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError>;
11951
11952    /// Write this Series to an Excel file.
11953    ///
11954    /// Matches `pd.Series.to_excel(path)` for the supported xlsx writer
11955    /// surface.
11956    fn to_excel(&self, path: &Path) -> Result<(), IoError>;
11957
11958    /// Write this Series to an Excel file.
11959    ///
11960    /// Explicit file-suffixed form of [`SeriesIoExt::to_excel`].
11961    fn to_excel_file(&self, path: &Path) -> Result<(), IoError>;
11962
11963    /// Write this Series to an Excel file with explicit options.
11964    fn to_excel_with_options(
11965        &self,
11966        path: &Path,
11967        options: &ExcelWriteOptions,
11968    ) -> Result<(), IoError>;
11969
11970    /// Serialize this Series to xlsx bytes.
11971    fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError>;
11972
11973    /// Serialize this Series to xlsx bytes with explicit options.
11974    fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError>;
11975
11976    /// Write this Series to a SQL table.
11977    ///
11978    /// Matches `pd.Series.to_sql(name, con)` for the supported SQL writer
11979    /// surface, including pandas' default index materialization.
11980    fn to_sql<C: SqlConnection>(
11981        &self,
11982        conn: &C,
11983        table_name: &str,
11984        if_exists: SqlIfExists,
11985    ) -> Result<(), IoError>;
11986
11987    /// Write this Series to a SQL table with pandas-style SQL write options.
11988    fn to_sql_with_options<C: SqlConnection>(
11989        &self,
11990        conn: &C,
11991        table_name: &str,
11992        options: &SqlWriteOptions,
11993    ) -> Result<(), IoError>;
11994
11995    /// Reject-closed clipboard writer, matching `pd.Series.to_clipboard()` shape.
11996    fn to_clipboard(&self) -> Result<(), IoError>;
11997}
11998
11999impl SeriesIoExt for Series {
12000    fn to_pickle(&self, path: &Path) -> Result<(), IoError> {
12001        write_pickle(&self.to_frame(None)?, path)
12002    }
12003
12004    fn to_pickle_file(&self, path: &Path) -> Result<(), IoError> {
12005        self.to_pickle(path)
12006    }
12007
12008    fn to_pickle_with_options(
12009        &self,
12010        path: &Path,
12011        options: &PickleWriteOptions,
12012    ) -> Result<(), IoError> {
12013        write_pickle_with_options(&self.to_frame(None)?, path, options)
12014    }
12015
12016    fn to_pickle_bytes(&self) -> Result<Vec<u8>, IoError> {
12017        write_pickle_bytes(&self.to_frame(None)?)
12018    }
12019
12020    fn to_pickle_bytes_with_options(
12021        &self,
12022        options: &PickleWriteOptions,
12023    ) -> Result<Vec<u8>, IoError> {
12024        write_pickle_bytes_with_options(&self.to_frame(None)?, options)
12025    }
12026
12027    fn to_csv_file(&self, path: &Path) -> Result<(), IoError> {
12028        self.to_csv_file_with_options(
12029            path,
12030            &CsvWriteOptions {
12031                include_index: true,
12032                ..CsvWriteOptions::default()
12033            },
12034        )
12035    }
12036
12037    fn to_csv_file_with_options(
12038        &self,
12039        path: &Path,
12040        options: &CsvWriteOptions,
12041    ) -> Result<(), IoError> {
12042        std::fs::write(path, self.to_csv_string_with_options(options)?)?;
12043        Ok(())
12044    }
12045
12046    fn to_csv_string(&self) -> Result<String, IoError> {
12047        self.to_csv_string_with_options(&CsvWriteOptions {
12048            include_index: true,
12049            ..CsvWriteOptions::default()
12050        })
12051    }
12052
12053    fn to_csv_string_with_options(&self, options: &CsvWriteOptions) -> Result<String, IoError> {
12054        write_csv_string_with_options(&self.to_frame(None)?, options)
12055    }
12056
12057    fn to_markdown_string(&self) -> Result<String, IoError> {
12058        self.to_markdown_string_with_options(&MarkdownWriteOptions::default())
12059    }
12060
12061    fn to_markdown_string_with_options(
12062        &self,
12063        options: &MarkdownWriteOptions,
12064    ) -> Result<String, IoError> {
12065        write_markdown_string_with_options(&self.to_frame(None)?, options)
12066    }
12067
12068    fn to_markdown_file(&self, path: &Path) -> Result<(), IoError> {
12069        self.to_markdown_file_with_options(path, &MarkdownWriteOptions::default())
12070    }
12071
12072    fn to_markdown_file_with_options(
12073        &self,
12074        path: &Path,
12075        options: &MarkdownWriteOptions,
12076    ) -> Result<(), IoError> {
12077        write_markdown_with_options(&self.to_frame(None)?, path, options)
12078    }
12079
12080    fn to_latex_string(&self) -> Result<String, IoError> {
12081        self.to_latex_string_with_options(&LatexWriteOptions::default())
12082    }
12083
12084    fn to_latex_string_with_options(&self, options: &LatexWriteOptions) -> Result<String, IoError> {
12085        write_latex_string_with_options(&self.to_frame(None)?, options)
12086    }
12087
12088    fn to_latex_file(&self, path: &Path) -> Result<(), IoError> {
12089        self.to_latex_file_with_options(path, &LatexWriteOptions::default())
12090    }
12091
12092    fn to_latex_file_with_options(
12093        &self,
12094        path: &Path,
12095        options: &LatexWriteOptions,
12096    ) -> Result<(), IoError> {
12097        write_latex_with_options(&self.to_frame(None)?, path, options)
12098    }
12099
12100    fn to_json_file(&self, path: &Path, orient: &str) -> Result<(), IoError> {
12101        std::fs::write(path, self.to_json_string(orient)?)?;
12102        Ok(())
12103    }
12104
12105    fn to_json_string(&self, orient: &str) -> Result<String, IoError> {
12106        Ok(Series::to_json(self, orient)?)
12107    }
12108
12109    fn to_hdf(&self, path: &Path) -> Result<(), IoError> {
12110        write_hdf(&self.to_frame(None)?, path)
12111    }
12112
12113    fn to_hdf_file(&self, path: &Path) -> Result<(), IoError> {
12114        self.to_hdf(path)
12115    }
12116
12117    fn to_hdf_key(&self, path: &Path, key: &str) -> Result<(), IoError> {
12118        write_hdf_key(&self.to_frame(None)?, path, key)
12119    }
12120
12121    fn to_hdf_with_options(&self, path: &Path, options: &HdfWriteOptions) -> Result<(), IoError> {
12122        write_hdf_with_options(&self.to_frame(None)?, path, options)
12123    }
12124
12125    fn to_excel(&self, path: &Path) -> Result<(), IoError> {
12126        write_excel(&self.to_frame(None)?, path)
12127    }
12128
12129    fn to_excel_file(&self, path: &Path) -> Result<(), IoError> {
12130        self.to_excel(path)
12131    }
12132
12133    fn to_excel_with_options(
12134        &self,
12135        path: &Path,
12136        options: &ExcelWriteOptions,
12137    ) -> Result<(), IoError> {
12138        write_excel_with_options(&self.to_frame(None)?, path, options)
12139    }
12140
12141    fn to_excel_bytes(&self) -> Result<Vec<u8>, IoError> {
12142        write_excel_bytes(&self.to_frame(None)?)
12143    }
12144
12145    fn to_excel_bytes_with_options(&self, options: &ExcelWriteOptions) -> Result<Vec<u8>, IoError> {
12146        write_excel_bytes_with_options(&self.to_frame(None)?, options)
12147    }
12148
12149    fn to_sql<C: SqlConnection>(
12150        &self,
12151        conn: &C,
12152        table_name: &str,
12153        if_exists: SqlIfExists,
12154    ) -> Result<(), IoError> {
12155        write_sql_with_options(
12156            &self.to_frame(None)?,
12157            conn,
12158            table_name,
12159            &SqlWriteOptions {
12160                if_exists,
12161                index: true,
12162                index_label: None,
12163                schema: None,
12164                dtype: None,
12165                method: SqlInsertMethod::Single,
12166                chunksize: None,
12167            },
12168        )
12169    }
12170
12171    fn to_sql_with_options<C: SqlConnection>(
12172        &self,
12173        conn: &C,
12174        table_name: &str,
12175        options: &SqlWriteOptions,
12176    ) -> Result<(), IoError> {
12177        write_sql_with_options(&self.to_frame(None)?, conn, table_name, options)
12178    }
12179
12180    fn to_clipboard(&self) -> Result<(), IoError> {
12181        let _ = self;
12182        Err(deferred_writer_error(
12183            "to_clipboard",
12184            "OS clipboard access requires GUI bindings outside FrankenPandas's headless charter",
12185        ))
12186    }
12187}
12188
12189#[cfg(test)]
12190mod tests {
12191    use std::collections::BTreeMap;
12192
12193    use arrow::{
12194        array::{Array, Int64Array},
12195        datatypes::DataType as ArrowDataType,
12196    };
12197    use fp_columnar::Column;
12198    use fp_frame::{DataFrame, Series};
12199    use fp_index::{Index, IndexLabel};
12200    use fp_types::{DType, NullKind, Scalar};
12201
12202    use super::{
12203        CsvWriteOptions, ExcelReadOptions, ExcelWriteOptions, HtmlReadOptions, HtmlWriteOptions,
12204        IoError, JsonOrient, LatexWriteOptions, MarkdownWriteOptions, PickleProtocol,
12205        PickleWriteOptions, StataWriteOptions, XmlReadOptions, XmlWriteOptions,
12206        format_pandas_float, read_csv_str, read_csv_with_index_cols, read_excel_bytes,
12207        read_feather_bytes, read_html, read_html_str, read_html_str_with_options, read_json_str,
12208        read_orc, read_orc_bytes, read_parquet_bytes, read_pickle, read_pickle_bytes, read_stata,
12209        read_stata_bytes, read_xml, read_xml_str, read_xml_str_with_options, write_csv_string,
12210        write_csv_string_with_options, write_excel_bytes, write_html, write_html_string,
12211        write_html_string_with_options, write_json_string, write_jsonl_string, write_latex,
12212        write_latex_string, write_latex_string_with_options, write_latex_with_options,
12213        write_markdown, write_markdown_string, write_markdown_string_with_options,
12214        write_markdown_with_options, write_orc, write_orc_bytes, write_pickle, write_pickle_bytes,
12215        write_stata, write_stata_bytes, write_stata_bytes_with_options, write_xml,
12216        write_xml_string, write_xml_string_with_options,
12217    };
12218    #[cfg(feature = "hdf5")]
12219    use super::{
12220        HdfReadOptions, HdfWriteOptions, read_hdf, read_hdf_key, read_hdf_with_options, write_hdf,
12221        write_hdf_key, write_hdf_with_options,
12222    };
12223
12224    #[test]
12225    fn csv_round_trip_preserves_null_and_numeric_shape() {
12226        let input = "id,value\n1,10\n2,\n3,3.5\n";
12227        let frame = read_csv_str(input).expect("read");
12228        let value_col = frame.column("value").expect("value");
12229
12230        assert_eq!(value_col.values()[1], Scalar::Null(NullKind::NaN));
12231
12232        let out = write_csv_string(&frame).expect("write");
12233        assert!(out.contains("id,value"));
12234        assert!(out.contains("3,3.5"));
12235    }
12236
12237    #[test]
12238    fn csv_numeric_fast_path_preserves_default_dtypes_and_values() {
12239        let input = "i,f\n1,0.5\n2,3\n";
12240        let frame = read_csv_str(input).expect("read");
12241
12242        let int_col = frame.column("i").expect("i");
12243        assert_eq!(int_col.dtype(), DType::Int64);
12244        assert_eq!(int_col.values(), &[Scalar::Int64(1), Scalar::Int64(2)]);
12245
12246        let float_col = frame.column("f").expect("f");
12247        assert_eq!(float_col.dtype(), DType::Float64);
12248        assert_eq!(
12249            float_col.values(),
12250            &[Scalar::Float64(0.5), Scalar::Float64(3.0)]
12251        );
12252    }
12253
12254    #[test]
12255    fn csv_numeric_probe_falls_back_to_preserve_object_raw_text() {
12256        let input = "x\n 1 \nabc\n";
12257        let frame = read_csv_str(input).expect("read");
12258        let column = frame.column("x").expect("x");
12259
12260        assert_eq!(column.dtype(), DType::Utf8);
12261        assert_eq!(
12262            column.values(),
12263            &[
12264                Scalar::Utf8(" 1 ".to_owned()),
12265                Scalar::Utf8("abc".to_owned())
12266            ]
12267        );
12268    }
12269
12270    #[test]
12271    fn csv_parses_boolean_true_false_case_insensitive() {
12272        let input = "flag\nTrue\nFALSE\ntrue\nfalse\n";
12273        let frame = read_csv_str(input).expect("read");
12274        let flag_col = frame.column("flag").expect("flag");
12275        assert_eq!(flag_col.values()[0], Scalar::Bool(true));
12276        assert_eq!(flag_col.values()[1], Scalar::Bool(false));
12277        assert_eq!(flag_col.values()[2], Scalar::Bool(true));
12278        assert_eq!(flag_col.values()[3], Scalar::Bool(false));
12279    }
12280
12281    #[test]
12282    fn csv_duplicate_headers_error() {
12283        let input = "a,a\n1,2\n";
12284        let err = read_csv_str(input).expect_err("duplicate header");
12285        assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "a"));
12286    }
12287
12288    #[test]
12289    fn csv_ragged_row_returns_error_4hpid() {
12290        // Per br-frankenpandas-4hpid: confirm pandas-faithful rejection on
12291        // ragged rows. The underlying csv crate raises UnequalLengths
12292        // (surfaced as IoError::Csv) — record.get(idx).unwrap_or_default()
12293        // inside the loop is dead code because the `row?` upstream errors
12294        // first. This locks in the rejection contract.
12295        let short_row = "a,b,c\n1,2,3\n4,5\n7,8,9\n";
12296        let err = read_csv_str(short_row).expect_err("short row must reject");
12297        assert!(
12298            matches!(err, IoError::Csv(_)),
12299            "expected IoError::Csv (UnequalLengths from csv crate), got {err:?}"
12300        );
12301    }
12302
12303    fn make_table_format_dataframe() -> DataFrame {
12304        let mut columns = BTreeMap::new();
12305        columns.insert(
12306            "name".to_owned(),
12307            Column::from_values(vec![
12308                Scalar::Utf8("A|B".to_owned()),
12309                Scalar::Utf8("under_score".to_owned()),
12310            ])
12311            .expect("name column"),
12312        );
12313        columns.insert(
12314            "value".to_owned(),
12315            Column::from_values(vec![Scalar::Float64(f64::NAN), Scalar::Int64(2)])
12316                .expect("value column"),
12317        );
12318
12319        let index = Index::new(vec![
12320            IndexLabel::Utf8("r&1".to_owned()),
12321            IndexLabel::Utf8("r_2".to_owned()),
12322        ])
12323        .set_name("row");
12324        DataFrame::new_with_column_order(
12325            index,
12326            columns,
12327            vec!["name".to_owned(), "value".to_owned()],
12328        )
12329        .expect("table format frame")
12330    }
12331
12332    #[test]
12333    fn markdown_table_writer_includes_index_missing_values_and_escaping() {
12334        let frame = make_table_format_dataframe();
12335
12336        let out = write_markdown_string(&frame).expect("markdown");
12337
12338        assert_eq!(
12339            out,
12340            concat!(
12341                "| row | name | value |\n",
12342                "| --- | --- | --- |\n",
12343                "| r&1 | A\\|B | NaN |\n",
12344                "| r_2 | under_score | 2 |\n",
12345            )
12346        );
12347    }
12348
12349    #[test]
12350    fn markdown_table_writer_options_can_omit_index_and_override_na() {
12351        let frame = make_table_format_dataframe();
12352
12353        let out = write_markdown_string_with_options(
12354            &frame,
12355            &MarkdownWriteOptions {
12356                include_index: false,
12357                na_rep: "<missing>".to_owned(),
12358                index_label: Some("ignored".to_owned()),
12359            },
12360        )
12361        .expect("markdown");
12362
12363        assert_eq!(
12364            out,
12365            concat!(
12366                "| name | value |\n",
12367                "| --- | --- |\n",
12368                "| A\\|B | <missing> |\n",
12369                "| under_score | 2 |\n",
12370            )
12371        );
12372    }
12373
12374    #[test]
12375    fn latex_table_writer_emits_booktabs_and_supports_escaping() {
12376        let frame = make_table_format_dataframe();
12377
12378        let out = write_latex_string_with_options(
12379            &frame,
12380            &LatexWriteOptions {
12381                include_index: true,
12382                na_rep: "NA".to_owned(),
12383                index_label: Some("row_id".to_owned()),
12384                escape: true,
12385            },
12386        )
12387        .expect("latex");
12388
12389        assert_eq!(
12390            out,
12391            concat!(
12392                "\\begin{tabular}{lll}\n",
12393                "\\toprule\n",
12394                " & name & value \\\\\n",
12395                "row\\_id &  &  \\\\\n",
12396                "\\midrule\n",
12397                "r\\&1 & A|B & NA \\\\\n",
12398                "r\\_2 & under\\_score & 2.000000 \\\\\n",
12399                "\\bottomrule\n",
12400                "\\end{tabular}\n",
12401            )
12402        );
12403    }
12404
12405    #[test]
12406    fn to_latex_floats_use_six_decimal_places_like_pandas() {
12407        // pandas to_latex default float_format is %.6f. Verified vs pandas 2.2.3:
12408        // 1.0->"1.000000", -2.5->"-2.500000", 0.1234567->"0.123457" (rounded),
12409        // inf->"inf". Other writers (csv/html) keep str(float).
12410        let values = vec![
12411            Scalar::Float64(1.0),
12412            Scalar::Float64(-2.5),
12413            Scalar::Float64(0.1234567),
12414            Scalar::Float64(f64::INFINITY),
12415        ];
12416        let col = Column::new(DType::Float64, values).expect("col");
12417        let mut cols = BTreeMap::new();
12418        cols.insert("a".to_string(), col);
12419        let index = Index::from_i64((0..4).collect());
12420        let frame =
12421            DataFrame::new_with_column_order(index, cols, vec!["a".to_string()]).expect("frame");
12422
12423        let out = write_latex_string_with_options(
12424            &frame,
12425            &LatexWriteOptions {
12426                include_index: false,
12427                na_rep: "NaN".to_owned(),
12428                index_label: None,
12429                escape: true,
12430            },
12431        )
12432        .expect("latex");
12433        assert!(out.contains("1.000000 \\\\"), "got: {out}");
12434        assert!(out.contains("-2.500000 \\\\"), "got: {out}");
12435        assert!(out.contains("0.123457 \\\\"), "got: {out}");
12436        assert!(out.contains("inf \\\\"), "got: {out}");
12437    }
12438
12439    #[test]
12440    fn markdown_latex_file_writers_match_string_outputs() {
12441        let frame = make_table_format_dataframe();
12442        let markdown_path = std::env::temp_dir().join(format!(
12443            "fp_io_markdown_writer_{}_{}.md",
12444            std::process::id(),
12445            line!()
12446        ));
12447        let latex_path = std::env::temp_dir().join(format!(
12448            "fp_io_latex_writer_{}_{}.tex",
12449            std::process::id(),
12450            line!()
12451        ));
12452
12453        write_markdown(&frame, &markdown_path).expect("write markdown path");
12454        write_latex(&frame, &latex_path).expect("write latex path");
12455
12456        assert_eq!(
12457            std::fs::read_to_string(&markdown_path).expect("read markdown path"),
12458            write_markdown_string(&frame).expect("markdown string")
12459        );
12460        assert_eq!(
12461            std::fs::read_to_string(&latex_path).expect("read latex path"),
12462            write_latex_string(&frame).expect("latex string")
12463        );
12464    }
12465
12466    #[test]
12467    fn markdown_latex_trait_aliases_forward_options() {
12468        use super::DataFrameIoExt;
12469
12470        let frame = make_table_format_dataframe();
12471        let markdown_options = MarkdownWriteOptions {
12472            include_index: false,
12473            na_rep: "NA".to_owned(),
12474            index_label: Some("ignored".to_owned()),
12475        };
12476        let latex_options = LatexWriteOptions {
12477            include_index: false,
12478            na_rep: "NA".to_owned(),
12479            index_label: Some("ignored".to_owned()),
12480            escape: true,
12481        };
12482        let markdown_path = std::env::temp_dir().join(format!(
12483            "fp_io_markdown_trait_{}_{}.md",
12484            std::process::id(),
12485            line!()
12486        ));
12487        let latex_path = std::env::temp_dir().join(format!(
12488            "fp_io_latex_trait_{}_{}.tex",
12489            std::process::id(),
12490            line!()
12491        ));
12492
12493        frame
12494            .to_markdown_file_with_options(&markdown_path, &markdown_options)
12495            .expect("trait markdown file");
12496        frame
12497            .to_latex_file_with_options(&latex_path, &latex_options)
12498            .expect("trait latex file");
12499
12500        assert_eq!(
12501            frame
12502                .to_markdown_string_with_options(&markdown_options)
12503                .expect("trait markdown options"),
12504            std::fs::read_to_string(&markdown_path).expect("read markdown trait path")
12505        );
12506        assert_eq!(
12507            frame
12508                .to_latex_string_with_options(&latex_options)
12509                .expect("trait latex options"),
12510            std::fs::read_to_string(&latex_path).expect("read latex trait path")
12511        );
12512
12513        let default_markdown_path = std::env::temp_dir().join(format!(
12514            "fp_io_markdown_trait_default_{}_{}.md",
12515            std::process::id(),
12516            line!()
12517        ));
12518        let default_latex_path = std::env::temp_dir().join(format!(
12519            "fp_io_latex_trait_default_{}_{}.tex",
12520            std::process::id(),
12521            line!()
12522        ));
12523        frame
12524            .to_markdown_file(&default_markdown_path)
12525            .expect("trait markdown default file");
12526        frame
12527            .to_latex_file(&default_latex_path)
12528            .expect("trait latex default file");
12529
12530        assert_eq!(
12531            std::fs::read_to_string(&default_markdown_path).expect("read markdown default"),
12532            write_markdown_string(&frame).expect("markdown default")
12533        );
12534        assert_eq!(
12535            std::fs::read_to_string(&default_latex_path).expect("read latex default"),
12536            write_latex_string(&frame).expect("latex default")
12537        );
12538
12539        let free_markdown_path = std::env::temp_dir().join(format!(
12540            "fp_io_markdown_free_options_{}_{}.md",
12541            std::process::id(),
12542            line!()
12543        ));
12544        let free_latex_path = std::env::temp_dir().join(format!(
12545            "fp_io_latex_free_options_{}_{}.tex",
12546            std::process::id(),
12547            line!()
12548        ));
12549        write_markdown_with_options(&frame, &free_markdown_path, &markdown_options)
12550            .expect("free markdown options file");
12551        write_latex_with_options(&frame, &free_latex_path, &latex_options)
12552            .expect("free latex options file");
12553        assert_eq!(
12554            std::fs::read_to_string(&free_markdown_path).expect("read free markdown options"),
12555            write_markdown_string_with_options(&frame, &markdown_options)
12556                .expect("free markdown options string")
12557        );
12558        assert_eq!(
12559            std::fs::read_to_string(&free_latex_path).expect("read free latex options"),
12560            write_latex_string_with_options(&frame, &latex_options)
12561                .expect("free latex options string")
12562        );
12563    }
12564
12565    #[test]
12566    fn html_table_writer_defaults_to_index_and_reuses_dataframe_formatter() {
12567        let frame = make_table_format_dataframe();
12568
12569        let out = write_html_string(&frame).expect("html");
12570
12571        assert_eq!(out, frame.to_html(true));
12572        assert!(out.contains("<th>r&amp;1</th>"));
12573        assert!(out.contains("<td>A|B</td>"));
12574        assert!(out.contains("<td>NaN</td>"));
12575    }
12576
12577    #[test]
12578    fn html_table_writer_options_can_omit_index() {
12579        let frame = make_table_format_dataframe();
12580
12581        let out = write_html_string_with_options(
12582            &frame,
12583            &HtmlWriteOptions {
12584                include_index: false,
12585                ..HtmlWriteOptions::default()
12586            },
12587        )
12588        .expect("html");
12589
12590        assert_eq!(out, frame.to_html(false));
12591        assert!(!out.contains("<th>r&amp;1</th>"));
12592        assert!(out.contains("<td>A|B</td>"));
12593    }
12594
12595    #[test]
12596    fn html_table_writer_supports_pandas_pure_string_options_u892h() {
12597        let mut columns = BTreeMap::new();
12598        columns.insert(
12599            "url&col".to_owned(),
12600            Column::from_values(vec![
12601                Scalar::Utf8("https://example.test/a?x=1&y=2".to_owned()),
12602                Scalar::Utf8("<b>".to_owned()),
12603            ])
12604            .expect("url column"),
12605        );
12606        columns.insert(
12607            "value".to_owned(),
12608            Column::from_values(vec![Scalar::Null(NullKind::NaN), Scalar::Float64(2.0)])
12609                .expect("value column"),
12610        );
12611        let frame = DataFrame::new_with_column_order(
12612            Index::new(vec![
12613                IndexLabel::Utf8("r&1".to_owned()),
12614                IndexLabel::Utf8("r2".to_owned()),
12615            ]),
12616            columns,
12617            vec!["url&col".to_owned(), "value".to_owned()],
12618        )
12619        .expect("html options frame");
12620
12621        let out = write_html_string_with_options(
12622            &frame,
12623            &HtmlWriteOptions {
12624                include_index: true,
12625                na_rep: "<NA>".to_owned(),
12626                classes: vec!["table table-sm".to_owned(), "fp".to_owned()],
12627                table_id: Some("report&1".to_owned()),
12628                border: Some(0),
12629                justify: Some("left".to_owned()),
12630                escape: true,
12631                render_links: true,
12632            },
12633        )
12634        .expect("html options");
12635
12636        assert!(
12637            out.starts_with("<table class=\"dataframe table table-sm fp\" id=\"report&amp;1\">")
12638        );
12639        assert!(!out.contains("border=\""));
12640        assert!(out.contains("<tr style=\"text-align: left;\">"));
12641        assert!(out.contains("<th>url&amp;col</th>"));
12642        assert!(out.contains("<th>r&amp;1</th>"));
12643        assert!(out.contains("<td>&lt;NA&gt;</td>"));
12644        assert!(out.contains(
12645            "<a href=\"https://example.test/a?x=1&amp;y=2\" target=\"_blank\">https://example.test/a?x=1&amp;y=2</a>"
12646        ));
12647        assert!(out.contains("<td>&lt;b&gt;</td>"));
12648    }
12649
12650    #[test]
12651    fn html_table_writer_can_disable_escaping_u892h() {
12652        let mut columns = BTreeMap::new();
12653        columns.insert(
12654            "raw<th>".to_owned(),
12655            Column::from_values(vec![
12656                Scalar::Utf8("<b>".to_owned()),
12657                Scalar::Null(NullKind::NaN),
12658            ])
12659            .expect("raw column"),
12660        );
12661        let frame = DataFrame::new_with_column_order(
12662            Index::new(vec![
12663                IndexLabel::Utf8("r&1".to_owned()),
12664                IndexLabel::Int64(2),
12665            ]),
12666            columns,
12667            vec!["raw<th>".to_owned()],
12668        )
12669        .expect("raw html frame");
12670
12671        let out = write_html_string_with_options(
12672            &frame,
12673            &HtmlWriteOptions {
12674                na_rep: "<NA>".to_owned(),
12675                escape: false,
12676                ..HtmlWriteOptions::default()
12677            },
12678        )
12679        .expect("raw html options");
12680
12681        assert!(out.contains("<th>raw<th></th>"));
12682        assert!(out.contains("<th>r&1</th>"));
12683        assert!(out.contains("<td><b></td>"));
12684        assert!(out.contains("<td><NA></td>"));
12685    }
12686
12687    #[test]
12688    fn html_table_writer_file_output_matches_string_output() {
12689        use super::DataFrameIoExt;
12690
12691        let frame = make_table_format_dataframe();
12692        let path = std::env::temp_dir().join(format!(
12693            "fp_io_html_writer_{}_{}.html",
12694            std::process::id(),
12695            line!()
12696        ));
12697
12698        write_html(&frame, &path).expect("write html");
12699        let file_out = std::fs::read_to_string(&path).expect("read html");
12700
12701        assert_eq!(file_out, write_html_string(&frame).expect("html string"));
12702        assert_eq!(
12703            frame.to_html_string().expect("trait html string"),
12704            write_html_string(&frame).expect("free html string")
12705        );
12706
12707        let no_index_path = std::env::temp_dir().join(format!(
12708            "fp_io_html_writer_no_index_{}_{}.html",
12709            std::process::id(),
12710            line!()
12711        ));
12712        let no_index_options = HtmlWriteOptions {
12713            include_index: false,
12714            ..HtmlWriteOptions::default()
12715        };
12716        frame
12717            .to_html_file_with_options(&no_index_path, &no_index_options)
12718            .expect("trait html file");
12719        assert_eq!(
12720            std::fs::read_to_string(&no_index_path).expect("read trait html"),
12721            write_html_string_with_options(&frame, &no_index_options).expect("free html options")
12722        );
12723    }
12724
12725    #[test]
12726    fn html_reader_parses_first_table_headers_and_missing_cells() {
12727        let html = concat!(
12728            "<html><body>",
12729            "<table><tr><td>ignored</td></tr></table>",
12730            "<table>",
12731            "<thead><tr><th>name</th><th>value</th><th>flag</th></tr></thead>",
12732            "<tbody>",
12733            "<tr><td>A&amp;B</td><td>1</td><td>True</td></tr>",
12734            "<tr><td>missing</td><td></td></tr>",
12735            "</tbody>",
12736            "</table>",
12737            "</body></html>",
12738        );
12739
12740        let frame = read_html_str_with_options(html, &HtmlReadOptions { table_index: 1 })
12741            .expect("read second table");
12742
12743        assert_eq!(
12744            frame
12745                .column_names()
12746                .into_iter()
12747                .map(String::as_str)
12748                .collect::<Vec<_>>(),
12749            vec!["name", "value", "flag"]
12750        );
12751        assert_eq!(
12752            frame.column("name").expect("name").values()[0],
12753            Scalar::Utf8("A&B".to_owned())
12754        );
12755        assert_eq!(
12756            frame.column("value").expect("value").values()[0],
12757            Scalar::Int64(1)
12758        );
12759        assert!(frame.column("value").expect("value").values()[1].is_missing());
12760        assert_eq!(
12761            frame.column("flag").expect("flag").values()[0],
12762            Scalar::Bool(true)
12763        );
12764        assert!(matches!(
12765            frame.column("flag").expect("flag").values()[1],
12766            Scalar::Null(NullKind::Null)
12767        ));
12768    }
12769
12770    #[test]
12771    fn html_reader_roundtrips_writer_output_as_columns() {
12772        let source = make_table_format_dataframe();
12773        let html = write_html_string(&source).expect("write html");
12774
12775        let frame = read_html_str(&html).expect("read writer html");
12776
12777        assert_eq!(
12778            frame
12779                .column_names()
12780                .into_iter()
12781                .map(String::as_str)
12782                .collect::<Vec<_>>(),
12783            vec!["Unnamed: 0", "name", "value"]
12784        );
12785        assert_eq!(
12786            frame.column("Unnamed: 0").expect("index column").values()[0],
12787            Scalar::Utf8("r&1".to_owned())
12788        );
12789        assert_eq!(
12790            frame.column("name").expect("name").values()[0],
12791            Scalar::Utf8("A|B".to_owned())
12792        );
12793        assert!(frame.column("value").expect("value").values()[0].is_missing());
12794        assert_eq!(
12795            frame.column("value").expect("value").values()[1],
12796            Scalar::Float64(2.0)
12797        );
12798    }
12799
12800    #[test]
12801    fn html_reader_path_reader_matches_string_reader() {
12802        use std::io::Write;
12803
12804        let html = "<table><tr><th>name</th></tr><tr><td>A</td></tr></table>\n";
12805        let path = std::env::temp_dir().join(format!(
12806            "fp_io_html_reader_{}_{}.html",
12807            std::process::id(),
12808            line!()
12809        ));
12810        let mut file = std::fs::OpenOptions::new()
12811            .write(true)
12812            .create_new(true)
12813            .open(&path)
12814            .expect("create html fixture");
12815        file.write_all(html.as_bytes()).expect("write html fixture");
12816
12817        let via_path = read_html(&path).expect("read path html");
12818        let via_str = read_html_str(html).expect("read string html");
12819
12820        assert_eq!(via_path.column_names(), via_str.column_names());
12821        assert_eq!(
12822            via_path.column("name").expect("path name").values(),
12823            via_str.column("name").expect("str name").values()
12824        );
12825    }
12826
12827    #[test]
12828    fn html_reader_rejects_no_table_duplicate_headers_and_wide_rows() {
12829        let err = read_html_str("<p>no table</p>").expect_err("missing table");
12830        assert!(matches!(err, IoError::Html(message) if message.contains("no table")));
12831
12832        let duplicate = "<table><tr><th>a</th><th>a</th></tr><tr><td>1</td><td>2</td></tr></table>";
12833        assert!(matches!(
12834            read_html_str(duplicate),
12835            Err(IoError::DuplicateColumnName(name)) if name == "a"
12836        ));
12837
12838        let wide = "<table><tr><th>a</th></tr><tr><td>1</td><td>2</td></tr></table>";
12839        let err = read_html_str(wide).expect_err("wide row");
12840        assert!(matches!(err, IoError::Html(message) if message.contains("row 0")));
12841    }
12842
12843    #[test]
12844    fn pickle_bytes_roundtrip_preserves_split_frame_shape() {
12845        let source = read_json_str(
12846            r#"{"columns":["name","value","flag"],"index":["r1","r2"],"data":[["alice",1,true],[null,2.5,false]]}"#,
12847            JsonOrient::Split,
12848        )
12849        .expect("source frame");
12850
12851        let bytes = write_pickle_bytes(&source).expect("write pickle bytes");
12852        assert!(!bytes.is_empty());
12853        let roundtrip = read_pickle_bytes(&bytes).expect("read pickle bytes");
12854
12855        assert_eq!(
12856            write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
12857            write_json_string(&source, JsonOrient::Split).expect("source json")
12858        );
12859    }
12860
12861    #[test]
12862    fn pickle_path_reader_matches_bytes_reader() {
12863        let source = make_table_format_dataframe();
12864        let path = std::env::temp_dir().join(format!(
12865            "fp_io_pickle_reader_{}_{}.pkl",
12866            std::process::id(),
12867            line!()
12868        ));
12869
12870        write_pickle(&source, &path).expect("write pickle path");
12871
12872        let via_path = read_pickle(&path).expect("read pickle path");
12873        let via_bytes =
12874            read_pickle_bytes(&std::fs::read(&path).expect("read pickle bytes from path"))
12875                .expect("read pickle bytes");
12876
12877        assert_eq!(
12878            write_json_string(&via_path, JsonOrient::Split).expect("path json"),
12879            write_json_string(&via_bytes, JsonOrient::Split).expect("bytes json")
12880        );
12881    }
12882
12883    #[test]
12884    fn pickle_protocol_v2_and_extension_aliases_roundtrip() {
12885        use super::DataFrameIoExt;
12886
12887        let source = make_table_format_dataframe();
12888        let options = PickleWriteOptions {
12889            protocol: PickleProtocol::V2,
12890        };
12891        let bytes = source
12892            .to_pickle_bytes_with_options(&options)
12893            .expect("trait pickle protocol v2");
12894        let roundtrip = read_pickle_bytes(&bytes).expect("read protocol v2");
12895
12896        assert_eq!(
12897            write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
12898            write_json_string(&source, JsonOrient::Split).expect("source json")
12899        );
12900        assert_eq!(
12901            source.to_pickle_bytes().expect("trait pickle bytes"),
12902            write_pickle_bytes(&source).expect("free pickle bytes")
12903        );
12904    }
12905
12906    #[test]
12907    fn series_pickle_extension_aliases_roundtrip_to_single_column_frame() {
12908        use super::SeriesIoExt;
12909
12910        let source = Series::from_values(
12911            "sales",
12912            vec!["r1".into(), "r2".into()],
12913            vec![Scalar::Int64(10), Scalar::Int64(12)],
12914        )
12915        .expect("source series");
12916
12917        let bytes = source.to_pickle_bytes().expect("series pickle bytes");
12918        let roundtrip = read_pickle_bytes(&bytes).expect("read series pickle frame");
12919        let names = roundtrip
12920            .column_names()
12921            .into_iter()
12922            .map(String::as_str)
12923            .collect::<Vec<_>>();
12924        assert_eq!(names, vec!["sales"]);
12925        assert_eq!(roundtrip.index().labels(), source.index().labels());
12926        assert_eq!(
12927            roundtrip.column("sales").expect("sales column").values(),
12928            source.values()
12929        );
12930
12931        let frame = source.to_frame(None).expect("series frame");
12932        assert_eq!(
12933            source.to_pickle_bytes().expect("trait pickle bytes"),
12934            write_pickle_bytes(&frame).expect("frame pickle bytes")
12935        );
12936
12937        let options = PickleWriteOptions {
12938            protocol: PickleProtocol::V2,
12939        };
12940        assert!(
12941            !source
12942                .to_pickle_bytes_with_options(&options)
12943                .expect("series pickle protocol v2")
12944                .is_empty()
12945        );
12946    }
12947
12948    #[test]
12949    fn series_csv_extension_aliases_preserve_default_index() {
12950        use super::SeriesIoExt;
12951
12952        let source = Series::from_values(
12953            "sales",
12954            vec!["r1".into(), "r2".into()],
12955            vec![Scalar::Int64(10), Scalar::Int64(12)],
12956        )
12957        .expect("source series");
12958
12959        let csv = source.to_csv_string().expect("series csv string");
12960        assert_eq!(csv, ",sales\nr1,10\nr2,12\n");
12961
12962        let no_index = source
12963            .to_csv_string_with_options(&CsvWriteOptions {
12964                include_index: false,
12965                ..CsvWriteOptions::default()
12966            })
12967            .expect("series csv without index");
12968        assert_eq!(no_index, "sales\n10\n12\n");
12969
12970        let path = std::env::temp_dir().join(format!(
12971            "fp_io_series_csv_{}_{}.csv",
12972            std::process::id(),
12973            line!()
12974        ));
12975        source.to_csv_file(&path).expect("series csv file");
12976        assert_eq!(
12977            std::fs::read_to_string(&path).expect("read series csv file"),
12978            csv
12979        );
12980    }
12981
12982    #[test]
12983    fn series_json_extension_aliases_use_series_orients() {
12984        use super::SeriesIoExt;
12985
12986        let source = Series::from_values(
12987            "sales",
12988            vec!["r1".into(), "r2".into()],
12989            vec![Scalar::Int64(10), Scalar::Int64(12)],
12990        )
12991        .expect("source series");
12992
12993        assert_eq!(
12994            source
12995                .to_json_string("records")
12996                .expect("series records json"),
12997            "[10,12]"
12998        );
12999
13000        let split: serde_json::Value =
13001            serde_json::from_str(&source.to_json_string("split").expect("series split json"))
13002                .expect("parse split json");
13003        assert_eq!(split["name"], "sales");
13004        assert_eq!(split["index"], serde_json::json!(["r1", "r2"]));
13005        assert_eq!(split["data"], serde_json::json!([10, 12]));
13006
13007        let path = std::env::temp_dir().join(format!(
13008            "fp_io_series_json_{}_{}.json",
13009            std::process::id(),
13010            line!()
13011        ));
13012        source
13013            .to_json_file(&path, "index")
13014            .expect("series json file");
13015        assert_eq!(
13016            std::fs::read_to_string(&path).expect("read series json file"),
13017            source.to_json("index").expect("series index json")
13018        );
13019    }
13020
13021    #[test]
13022    fn series_markdown_extension_aliases_forward_options() {
13023        use super::SeriesIoExt;
13024
13025        let source = Series::from_values(
13026            "sales",
13027            vec!["r1".into(), "r2".into()],
13028            vec![Scalar::Int64(10), Scalar::Null(NullKind::NaN)],
13029        )
13030        .expect("source series");
13031        let options = MarkdownWriteOptions {
13032            include_index: false,
13033            na_rep: "NA".to_owned(),
13034            index_label: Some("ignored".to_owned()),
13035        };
13036
13037        assert_eq!(
13038            source.to_markdown_string().expect("series markdown string"),
13039            write_markdown_string(&source.to_frame(None).expect("series frame"))
13040                .expect("frame markdown string")
13041        );
13042        assert_eq!(
13043            source
13044                .to_markdown_string_with_options(&options)
13045                .expect("series markdown options"),
13046            write_markdown_string_with_options(
13047                &source.to_frame(None).expect("series options frame"),
13048                &options,
13049            )
13050            .expect("frame markdown options")
13051        );
13052
13053        let path = std::env::temp_dir().join(format!(
13054            "fp_io_series_markdown_{}_{}.md",
13055            std::process::id(),
13056            line!()
13057        ));
13058        source
13059            .to_markdown_file_with_options(&path, &options)
13060            .expect("series markdown file");
13061        assert_eq!(
13062            std::fs::read_to_string(&path).expect("read series markdown file"),
13063            source
13064                .to_markdown_string_with_options(&options)
13065                .expect("series markdown options string")
13066        );
13067    }
13068
13069    #[test]
13070    fn series_latex_extension_aliases_forward_options() {
13071        use super::SeriesIoExt;
13072
13073        let source = Series::from_values(
13074            "sales&tax",
13075            vec!["r1".into(), "r2".into()],
13076            vec![Scalar::Utf8("a&b".into()), Scalar::Null(NullKind::NaN)],
13077        )
13078        .expect("source series");
13079        let options = LatexWriteOptions {
13080            include_index: false,
13081            na_rep: "NA".to_owned(),
13082            index_label: Some("ignored".to_owned()),
13083            escape: true,
13084        };
13085
13086        assert_eq!(
13087            source.to_latex_string().expect("series latex string"),
13088            write_latex_string(&source.to_frame(None).expect("series frame"))
13089                .expect("frame latex string")
13090        );
13091        assert_eq!(
13092            source
13093                .to_latex_string_with_options(&options)
13094                .expect("series latex options"),
13095            write_latex_string_with_options(
13096                &source.to_frame(None).expect("series options frame"),
13097                &options,
13098            )
13099            .expect("frame latex options")
13100        );
13101
13102        let path = std::env::temp_dir().join(format!(
13103            "fp_io_series_latex_{}_{}.tex",
13104            std::process::id(),
13105            line!()
13106        ));
13107        source
13108            .to_latex_file_with_options(&path, &options)
13109            .expect("series latex file");
13110        assert_eq!(
13111            std::fs::read_to_string(&path).expect("read series latex file"),
13112            source
13113                .to_latex_string_with_options(&options)
13114                .expect("series latex options string")
13115        );
13116    }
13117
13118    #[cfg(feature = "hdf5")]
13119    #[test]
13120    fn series_hdf5_extension_aliases_roundtrip_to_single_column_frame() {
13121        use super::SeriesIoExt;
13122
13123        let source = Series::from_values(
13124            "sales",
13125            vec!["r1".into(), "r2".into()],
13126            vec![Scalar::Int64(10), Scalar::Int64(12)],
13127        )
13128        .expect("source series");
13129        let expected = source.to_frame(None).expect("series frame");
13130
13131        let key_path = std::env::temp_dir().join(format!(
13132            "fp_io_series_hdf5_key_{}_{}.h5",
13133            std::process::id(),
13134            line!()
13135        ));
13136        source
13137            .to_hdf_key(&key_path, "series/data")
13138            .expect("series hdf key");
13139        assert!(
13140            read_hdf_key(&key_path, "series/data")
13141                .expect("read series hdf key")
13142                .equals(&expected)
13143        );
13144
13145        let default_path = std::env::temp_dir().join(format!(
13146            "fp_io_series_hdf5_default_{}_{}.h5",
13147            std::process::id(),
13148            line!()
13149        ));
13150        source
13151            .to_hdf_file(&default_path)
13152            .expect("series hdf default key");
13153        assert!(
13154            read_hdf(&default_path)
13155                .expect("read series hdf default")
13156                .equals(&expected)
13157        );
13158
13159        let options_path = std::env::temp_dir().join(format!(
13160            "fp_io_series_hdf5_options_{}_{}.h5",
13161            std::process::id(),
13162            line!()
13163        ));
13164        source
13165            .to_hdf_with_options(
13166                &options_path,
13167                &HdfWriteOptions {
13168                    key: "series/options".to_owned(),
13169                },
13170            )
13171            .expect("series hdf options");
13172        assert!(
13173            read_hdf_key(&options_path, "series/options")
13174                .expect("read series hdf options")
13175                .equals(&expected)
13176        );
13177    }
13178
13179    #[test]
13180    fn series_excel_extension_aliases_roundtrip_to_single_column_frame() {
13181        use super::SeriesIoExt;
13182
13183        let source = Series::from_values(
13184            "sales",
13185            vec!["r1".into(), "r2".into()],
13186            vec![Scalar::Int64(10), Scalar::Int64(12)],
13187        )
13188        .expect("source series");
13189
13190        let bytes = source.to_excel_bytes().expect("series excel bytes");
13191        let roundtrip =
13192            read_excel_bytes(&bytes, &ExcelReadOptions::default()).expect("read series excel");
13193        let names = roundtrip
13194            .column_names()
13195            .into_iter()
13196            .map(String::as_str)
13197            .collect::<Vec<_>>();
13198        assert_eq!(names, vec!["column_0", "sales"]);
13199        assert_eq!(
13200            roundtrip.column("column_0").expect("index column").values(),
13201            &[Scalar::Utf8("r1".into()), Scalar::Utf8("r2".into())]
13202        );
13203        assert_eq!(
13204            roundtrip.column("sales").expect("sales column").values(),
13205            source.values()
13206        );
13207
13208        let frame = source.to_frame(None).expect("series frame");
13209        assert_eq!(
13210            source.to_excel_bytes().expect("trait excel bytes"),
13211            write_excel_bytes(&frame).expect("frame excel bytes")
13212        );
13213
13214        let options = ExcelWriteOptions {
13215            index: false,
13216            ..ExcelWriteOptions::default()
13217        };
13218        let no_index_bytes = source
13219            .to_excel_bytes_with_options(&options)
13220            .expect("series excel index false");
13221        let no_index = read_excel_bytes(&no_index_bytes, &ExcelReadOptions::default())
13222            .expect("read no-index series excel");
13223        assert_eq!(no_index.column_names(), vec!["sales"]);
13224        assert_eq!(no_index.index().len(), source.index().len());
13225    }
13226
13227    #[test]
13228    fn pickle_reader_rejects_malformed_and_foreign_payloads() {
13229        let err = read_pickle_bytes(b"not a pickle").expect_err("malformed pickle");
13230        assert!(matches!(err, IoError::Pickle(_)));
13231
13232        let foreign = serde_pickle::to_vec(
13233            &serde_json::json!({"payload": {"columns": [], "index": [], "data": []}}),
13234            serde_pickle::SerOptions::new(),
13235        )
13236        .expect("foreign pickle");
13237        let err = read_pickle_bytes(&foreign).expect_err("foreign pickle");
13238        assert!(matches!(
13239            err,
13240            IoError::Pickle(message) if message.contains("format marker")
13241        ));
13242    }
13243
13244    #[cfg(feature = "hdf5")]
13245    #[test]
13246    fn hdf5_path_roundtrip_preserves_snapshot_frame() {
13247        let source = make_table_format_dataframe();
13248        let path = std::env::temp_dir().join(format!(
13249            "fp_io_hdf5_default_{}_{}.h5",
13250            std::process::id(),
13251            line!()
13252        ));
13253
13254        write_hdf(&source, &path).expect("write hdf default key");
13255        let roundtrip = read_hdf(&path).expect("read hdf default key");
13256
13257        assert_eq!(
13258            write_json_string(&roundtrip, JsonOrient::Split).expect("roundtrip json"),
13259            write_json_string(&source, JsonOrient::Split).expect("source json")
13260        );
13261    }
13262
13263    #[cfg(feature = "hdf5")]
13264    #[test]
13265    fn hdf5_custom_key_and_extension_aliases_roundtrip() {
13266        use super::DataFrameIoExt;
13267
13268        let source = make_test_dataframe();
13269        let free_path = std::env::temp_dir().join(format!(
13270            "fp_io_hdf5_custom_free_{}_{}.h5",
13271            std::process::id(),
13272            line!()
13273        ));
13274        let trait_path = std::env::temp_dir().join(format!(
13275            "fp_io_hdf5_custom_trait_{}_{}.h5",
13276            std::process::id(),
13277            line!()
13278        ));
13279        let default_path = std::env::temp_dir().join(format!(
13280            "fp_io_hdf5_custom_default_{}_{}.h5",
13281            std::process::id(),
13282            line!()
13283        ));
13284        let write_options = HdfWriteOptions {
13285            key: "tables/snapshot".to_owned(),
13286        };
13287
13288        write_hdf_with_options(&source, &free_path, &write_options).expect("write custom key");
13289        let roundtrip = read_hdf_with_options(
13290            &free_path,
13291            &HdfReadOptions {
13292                key: "/tables/snapshot/".to_owned(),
13293            },
13294        )
13295        .expect("read custom key with slash aliases");
13296        assert!(roundtrip.equals(&source));
13297
13298        source
13299            .to_hdf_key(&trait_path, "nested/frame")
13300            .expect("trait hdf key");
13301        assert!(
13302            read_hdf_key(&trait_path, "nested/frame")
13303                .expect("read trait hdf key")
13304                .equals(&source)
13305        );
13306
13307        source
13308            .to_hdf_file(&default_path)
13309            .expect("trait hdf default key");
13310        assert!(
13311            read_hdf(&default_path)
13312                .expect("read trait hdf default")
13313                .equals(&source)
13314        );
13315    }
13316
13317    #[cfg(feature = "hdf5")]
13318    #[test]
13319    fn hdf5_row_multiindex_roundtrip_restores_logical_row_axis() {
13320        let frame = make_row_multiindex_test_dataframe();
13321        let path = std::env::temp_dir().join(format!(
13322            "fp_io_hdf5_multiindex_{}_{}.h5",
13323            std::process::id(),
13324            line!()
13325        ));
13326
13327        write_hdf_key(&frame, &path, "axes/frame").expect("write hdf multiindex");
13328        let roundtrip = read_hdf_key(&path, "axes/frame").expect("read hdf multiindex");
13329
13330        assert!(roundtrip.equals(&frame));
13331        assert!(roundtrip.column("__index_level_0__").is_none());
13332        assert_eq!(
13333            roundtrip
13334                .row_multiindex()
13335                .expect("row multiindex should be restored")
13336                .get_level_values(0)
13337                .unwrap()
13338                .labels(),
13339            frame
13340                .row_multiindex()
13341                .expect("source row multiindex")
13342                .get_level_values(0)
13343                .unwrap()
13344                .labels()
13345        );
13346    }
13347
13348    #[cfg(feature = "hdf5")]
13349    #[test]
13350    fn hdf5_reader_rejects_invalid_keys_and_missing_payloads() {
13351        let frame = make_test_dataframe();
13352        let path = std::env::temp_dir().join(format!(
13353            "fp_io_hdf5_missing_payload_{}_{}.h5",
13354            std::process::id(),
13355            line!()
13356        ));
13357
13358        let file = hdf5::File::create(&path).expect("create hdf shell");
13359        file.create_group("frame")
13360            .expect("create empty frame group");
13361        file.flush().expect("flush hdf shell");
13362        drop(file);
13363
13364        let err = read_hdf(&path).expect_err("missing payload should fail");
13365        assert!(matches!(
13366            err,
13367            IoError::Hdf5(message) if message.contains("missing FrankenPandas payload dataset")
13368        ));
13369
13370        let err = write_hdf_key(&frame, &path, "../bad").expect_err("invalid key should fail");
13371        assert!(matches!(
13372            err,
13373            IoError::Hdf5(message) if message.contains("invalid hdf5 key")
13374        ));
13375    }
13376
13377    fn make_stata_dataframe() -> DataFrame {
13378        let mut columns = BTreeMap::new();
13379        columns.insert(
13380            "id".to_owned(),
13381            Column::from_values(vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)])
13382                .expect("id column"),
13383        );
13384        columns.insert(
13385            "score".to_owned(),
13386            Column::from_values(vec![
13387                Scalar::Float64(1.5),
13388                Scalar::Null(NullKind::NaN),
13389                Scalar::Float64(3.25),
13390            ])
13391            .expect("score column"),
13392        );
13393        columns.insert(
13394            "flag".to_owned(),
13395            Column::from_values(vec![
13396                Scalar::Bool(true),
13397                Scalar::Bool(false),
13398                Scalar::Bool(true),
13399            ])
13400            .expect("flag column"),
13401        );
13402        columns.insert(
13403            "label".to_owned(),
13404            Column::from_values(vec![
13405                Scalar::Utf8("alpha".to_owned()),
13406                Scalar::Utf8("beta".to_owned()),
13407                Scalar::Utf8("gamma".to_owned()),
13408            ])
13409            .expect("label column"),
13410        );
13411
13412        DataFrame::new_with_column_order(
13413            Index::new(vec![
13414                IndexLabel::Utf8("row_a".to_owned()),
13415                IndexLabel::Utf8("row_b".to_owned()),
13416                IndexLabel::Utf8("row_c".to_owned()),
13417            ]),
13418            columns,
13419            vec![
13420                "id".to_owned(),
13421                "score".to_owned(),
13422                "flag".to_owned(),
13423                "label".to_owned(),
13424            ],
13425        )
13426        .expect("stata frame")
13427    }
13428
13429    #[test]
13430    fn stata_bytes_roundtrip_preserves_supported_columns() {
13431        let source = make_stata_dataframe();
13432        let bytes = write_stata_bytes(&source).expect("write stata bytes");
13433        assert!(!bytes.is_empty());
13434
13435        let roundtrip = read_stata_bytes(&bytes).expect("read stata bytes");
13436
13437        assert_eq!(
13438            roundtrip
13439                .column_names()
13440                .into_iter()
13441                .map(String::as_str)
13442                .collect::<Vec<_>>(),
13443            vec!["index", "id", "score", "flag", "label"]
13444        );
13445        assert_eq!(
13446            roundtrip.column("index").expect("index").values(),
13447            &[
13448                Scalar::Utf8("row_a".to_owned()),
13449                Scalar::Utf8("row_b".to_owned()),
13450                Scalar::Utf8("row_c".to_owned())
13451            ]
13452        );
13453        assert_eq!(
13454            roundtrip.column("id").expect("id").values(),
13455            &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
13456        );
13457        assert_eq!(
13458            roundtrip.column("score").expect("score").values(),
13459            &[
13460                Scalar::Float64(1.5),
13461                Scalar::Null(NullKind::NaN),
13462                Scalar::Float64(3.25)
13463            ]
13464        );
13465        assert_eq!(
13466            roundtrip.column("flag").expect("flag").values(),
13467            &[Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(1)]
13468        );
13469        assert_eq!(
13470            roundtrip.column("label").expect("label").values(),
13471            &[
13472                Scalar::Utf8("alpha".to_owned()),
13473                Scalar::Utf8("beta".to_owned()),
13474                Scalar::Utf8("gamma".to_owned())
13475            ]
13476        );
13477    }
13478
13479    #[test]
13480    fn stata_path_reader_matches_bytes_reader() {
13481        let source = make_stata_dataframe();
13482        let path = std::env::temp_dir().join(format!(
13483            "fp_io_stata_reader_{}_{}.dta",
13484            std::process::id(),
13485            line!()
13486        ));
13487
13488        write_stata(&source, &path).expect("write stata path");
13489
13490        let via_path = read_stata(&path).expect("read stata path");
13491        let via_bytes =
13492            read_stata_bytes(&std::fs::read(&path).expect("read stata bytes from path"))
13493                .expect("read stata bytes");
13494
13495        assert_eq!(via_path.column_names(), via_bytes.column_names());
13496        for name in via_path.column_names() {
13497            assert_eq!(
13498                via_path.column(name).expect("path column").values(),
13499                via_bytes.column(name).expect("bytes column").values()
13500            );
13501        }
13502    }
13503
13504    #[test]
13505    fn stata_extension_aliases_and_no_index_option_roundtrip() {
13506        use super::DataFrameIoExt;
13507
13508        let source = make_stata_dataframe();
13509        let options = StataWriteOptions {
13510            include_index: false,
13511            index_label: Some("ignored".to_owned()),
13512        };
13513        let bytes = source
13514            .to_stata_bytes_with_options(&options)
13515            .expect("trait stata bytes without index");
13516        let roundtrip = read_stata_bytes(&bytes).expect("read no-index stata");
13517
13518        assert_eq!(
13519            roundtrip
13520                .column_names()
13521                .into_iter()
13522                .map(String::as_str)
13523                .collect::<Vec<_>>(),
13524            vec!["id", "score", "flag", "label"]
13525        );
13526
13527        let path = std::env::temp_dir().join(format!(
13528            "fp_io_stata_trait_{}_{}.dta",
13529            std::process::id(),
13530            line!()
13531        ));
13532        source
13533            .to_stata_with_options(&path, &options)
13534            .expect("trait stata path without index");
13535        let via_path = read_stata(&path).expect("read trait stata path");
13536        assert_eq!(via_path.column_names(), roundtrip.column_names());
13537
13538        assert_eq!(
13539            source.to_stata_bytes().expect("trait stata bytes"),
13540            write_stata_bytes(&source).expect("free stata bytes")
13541        );
13542    }
13543
13544    #[test]
13545    fn stata_writer_rejects_invalid_variable_names_and_malformed_input() {
13546        let mut columns = BTreeMap::new();
13547        columns.insert(
13548            "bad-name".to_owned(),
13549            Column::from_values(vec![Scalar::Int64(1)]).expect("bad column"),
13550        );
13551        let frame = DataFrame::new_with_column_order(
13552            Index::from_i64(vec![0]),
13553            columns,
13554            vec!["bad-name".to_owned()],
13555        )
13556        .expect("frame with invalid stata column");
13557
13558        let err = write_stata_bytes(&frame).expect_err("invalid stata variable name");
13559        assert!(matches!(
13560            err,
13561            IoError::Stata(message) if message.contains("invalid Stata variable name")
13562        ));
13563
13564        let source = make_stata_dataframe();
13565        let err = write_stata_bytes_with_options(
13566            &source,
13567            &StataWriteOptions {
13568                include_index: true,
13569                index_label: Some("1bad".to_owned()),
13570            },
13571        )
13572        .expect_err("invalid index variable name");
13573        assert!(matches!(
13574            err,
13575            IoError::Stata(message) if message.contains("first character")
13576        ));
13577
13578        let err = read_stata_bytes(b"not a dta").expect_err("malformed stata");
13579        assert!(matches!(err, IoError::Stata(_)));
13580    }
13581
13582    #[test]
13583    fn xml_writer_defaults_to_index_and_escapes_values() {
13584        let frame = make_table_format_dataframe();
13585
13586        let out = write_xml_string(&frame).expect("xml");
13587
13588        assert_eq!(
13589            out,
13590            concat!(
13591                "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13592                "<data>\n",
13593                "  <row>\n",
13594                "    <row>r&amp;1</row>\n",
13595                "    <name>A|B</name>\n",
13596                "    <value/>\n",
13597                "  </row>\n",
13598                "  <row>\n",
13599                "    <row>r_2</row>\n",
13600                "    <name>under_score</name>\n",
13601                "    <value>2.0</value>\n",
13602                "  </row>\n",
13603                "</data>\n",
13604            )
13605        );
13606    }
13607
13608    #[test]
13609    fn xml_writer_options_can_omit_index_and_reject_bad_names() {
13610        let frame = make_table_format_dataframe();
13611
13612        let out = write_xml_string_with_options(
13613            &frame,
13614            &XmlWriteOptions {
13615                include_index: false,
13616                root_name: "records".to_owned(),
13617                row_name: "entry".to_owned(),
13618                index_label: Some("ignored".to_owned()),
13619            },
13620        )
13621        .expect("xml");
13622
13623        assert_eq!(
13624            out,
13625            concat!(
13626                "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13627                "<records>\n",
13628                "  <entry>\n",
13629                "    <name>A|B</name>\n",
13630                "    <value/>\n",
13631                "  </entry>\n",
13632                "  <entry>\n",
13633                "    <name>under_score</name>\n",
13634                "    <value>2.0</value>\n",
13635                "  </entry>\n",
13636                "</records>\n",
13637            )
13638        );
13639
13640        let err = write_xml_string_with_options(
13641            &frame,
13642            &XmlWriteOptions {
13643                root_name: "bad name".to_owned(),
13644                ..Default::default()
13645            },
13646        )
13647        .expect_err("invalid xml name");
13648        assert!(matches!(err, IoError::Xml(message) if message.contains("bad name")));
13649    }
13650
13651    #[test]
13652    fn xml_writer_escapes_text_like_pandas_etree() {
13653        let mut columns = BTreeMap::new();
13654        columns.insert(
13655            "name".to_owned(),
13656            Column::from_values(vec![Scalar::Utf8(
13657                "A&B <tag> \"quote\" it's\r\nnext".to_owned(),
13658            )])
13659            .expect("name column"),
13660        );
13661        let frame = DataFrame::new_with_column_order(
13662            Index::new(vec![IndexLabel::Utf8("idx".to_owned())]),
13663            columns,
13664            vec!["name".to_owned()],
13665        )
13666        .expect("xml escape frame");
13667
13668        assert_eq!(
13669            write_xml_string_with_options(
13670                &frame,
13671                &XmlWriteOptions {
13672                    include_index: false,
13673                    ..Default::default()
13674                },
13675            )
13676            .expect("xml"),
13677            concat!(
13678                "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13679                "<data>\n",
13680                "  <row>\n",
13681                "    <name>A&amp;B &lt;tag&gt; \"quote\" it's\n",
13682                "next</name>\n",
13683                "  </row>\n",
13684                "</data>\n",
13685            )
13686        );
13687    }
13688
13689    #[test]
13690    fn xml_writer_file_output_and_extension_aliases_match_free_functions() {
13691        use super::DataFrameIoExt;
13692
13693        let frame = make_table_format_dataframe();
13694        let path = std::env::temp_dir().join(format!(
13695            "fp_io_xml_writer_{}_{}.xml",
13696            std::process::id(),
13697            line!()
13698        ));
13699
13700        write_xml(&frame, &path).expect("write xml");
13701        assert_eq!(
13702            std::fs::read_to_string(&path).expect("read xml"),
13703            write_xml_string(&frame).expect("xml string")
13704        );
13705        assert_eq!(
13706            frame.to_xml_string().expect("trait xml string"),
13707            write_xml_string(&frame).expect("free xml string")
13708        );
13709
13710        let trait_path = std::env::temp_dir().join(format!(
13711            "fp_io_xml_writer_trait_alias_{}_{}.xml",
13712            std::process::id(),
13713            line!()
13714        ));
13715        frame.to_xml(&trait_path).expect("trait xml alias");
13716        assert_eq!(
13717            std::fs::read_to_string(&trait_path).expect("read trait xml alias"),
13718            write_xml_string(&frame).expect("free xml string")
13719        );
13720
13721        let no_index_options = XmlWriteOptions {
13722            include_index: false,
13723            ..Default::default()
13724        };
13725        let no_index_path = std::env::temp_dir().join(format!(
13726            "fp_io_xml_writer_no_index_{}_{}.xml",
13727            std::process::id(),
13728            line!()
13729        ));
13730        frame
13731            .to_xml_file_with_options(&no_index_path, &no_index_options)
13732            .expect("trait xml file");
13733        assert_eq!(
13734            std::fs::read_to_string(&no_index_path).expect("read trait xml"),
13735            write_xml_string_with_options(&frame, &no_index_options).expect("free xml options")
13736        );
13737    }
13738
13739    #[test]
13740    fn xml_reader_parses_pandas_row_shape_and_empty_values() {
13741        let xml = concat!(
13742            "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
13743            "<data>\n",
13744            "  <row>\n",
13745            "    <index>0</index>\n",
13746            "    <a>1</a>\n",
13747            "    <b/>\n",
13748            "  </row>\n",
13749            "  <row>\n",
13750            "    <index>1</index>\n",
13751            "    <a>2.5</a>\n",
13752            "    <b>x</b>\n",
13753            "  </row>\n",
13754            "</data>\n",
13755        );
13756
13757        let frame = read_xml_str(xml).expect("read xml");
13758
13759        assert_eq!(
13760            frame
13761                .column_names()
13762                .into_iter()
13763                .map(String::as_str)
13764                .collect::<Vec<_>>(),
13765            vec!["index", "a", "b"]
13766        );
13767        assert_eq!(
13768            frame.index().labels(),
13769            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
13770        );
13771        assert_eq!(
13772            frame.column("index").expect("index").values()[0],
13773            Scalar::Int64(0)
13774        );
13775        assert_eq!(
13776            frame.column("a").expect("a").values()[1],
13777            Scalar::Float64(2.5)
13778        );
13779        assert!(matches!(
13780            frame.column("b").expect("b").values()[0],
13781            Scalar::Null(NullKind::Null)
13782        ));
13783        assert_eq!(
13784            frame.column("b").expect("b").values()[1],
13785            Scalar::Utf8("x".to_owned())
13786        );
13787    }
13788
13789    #[test]
13790    fn xml_reader_roundtrips_writer_output_as_columns() {
13791        let source = make_table_format_dataframe();
13792        let xml = write_xml_string(&source).expect("write xml");
13793
13794        let frame = read_xml_str(&xml).expect("read writer xml");
13795
13796        assert_eq!(
13797            frame
13798                .column_names()
13799                .into_iter()
13800                .map(String::as_str)
13801                .collect::<Vec<_>>(),
13802            vec!["row", "name", "value"]
13803        );
13804        assert_eq!(
13805            frame.column("row").expect("row").values()[0],
13806            Scalar::Utf8("r&1".to_owned())
13807        );
13808        assert_eq!(
13809            frame.column("name").expect("name").values()[0],
13810            Scalar::Utf8("A|B".to_owned())
13811        );
13812        assert!(frame.column("value").expect("value").values()[0].is_missing());
13813        assert_eq!(
13814            frame.column("value").expect("value").values()[1],
13815            Scalar::Float64(2.0)
13816        );
13817    }
13818
13819    #[test]
13820    fn xml_reader_unescapes_text_and_supports_custom_row_names() {
13821        let xml = concat!(
13822            "<records>\n",
13823            "  <entry><name>A&amp;B &lt;tag&gt; \"quote\" it's</name><flag>True</flag></entry>\n",
13824            "  <entry><name>line\n",
13825            "next</name><flag>false</flag></entry>\n",
13826            "</records>\n",
13827        );
13828
13829        let frame = read_xml_str_with_options(
13830            xml,
13831            &XmlReadOptions {
13832                row_name: "entry".to_owned(),
13833            },
13834        )
13835        .expect("read custom xml");
13836
13837        assert_eq!(
13838            frame.column("name").expect("name").values()[0],
13839            Scalar::Utf8("A&B <tag> \"quote\" it's".to_owned())
13840        );
13841        assert_eq!(
13842            frame.column("name").expect("name").values()[1],
13843            Scalar::Utf8("line\nnext".to_owned())
13844        );
13845        assert_eq!(
13846            frame.column("flag").expect("flag").values()[0],
13847            Scalar::Bool(true)
13848        );
13849        assert_eq!(
13850            frame.column("flag").expect("flag").values()[1],
13851            Scalar::Bool(false)
13852        );
13853    }
13854
13855    #[test]
13856    fn xml_reader_path_reader_matches_string_reader() {
13857        use std::io::Write;
13858
13859        let xml = "<data><row><name>A</name></row></data>\n";
13860        let path = std::env::temp_dir().join(format!(
13861            "fp_io_xml_reader_{}_{}.xml",
13862            std::process::id(),
13863            line!()
13864        ));
13865        let mut file = std::fs::OpenOptions::new()
13866            .write(true)
13867            .create_new(true)
13868            .open(&path)
13869            .expect("create xml fixture");
13870        file.write_all(xml.as_bytes()).expect("write xml fixture");
13871
13872        let via_path = read_xml(&path).expect("read path xml");
13873        let via_str = read_xml_str(xml).expect("read string xml");
13874
13875        assert_eq!(via_path.column_names(), via_str.column_names());
13876        assert_eq!(
13877            via_path.column("name").expect("path name").values(),
13878            via_str.column("name").expect("str name").values()
13879        );
13880    }
13881
13882    #[test]
13883    fn xml_reader_rejects_malformed_nested_and_duplicate_fields() {
13884        let malformed = "<data><row><name>A</row></data>";
13885        assert!(matches!(read_xml_str(malformed), Err(IoError::Xml(_))));
13886
13887        let nested = "<data><row><name><inner>A</inner></name></row></data>";
13888        let err = read_xml_str(nested).expect_err("nested field error");
13889        assert!(matches!(err, IoError::Xml(message) if message.contains("nested xml element")));
13890
13891        let duplicate = "<data><row><name>A</name><name>B</name></row></data>";
13892        let err = read_xml_str(duplicate).expect_err("duplicate field error");
13893        assert!(matches!(err, IoError::Xml(message) if message.contains("duplicate xml field")));
13894    }
13895
13896    // === AG-07-T: CSV Parser Optimization Tests ===
13897
13898    #[test]
13899    fn test_csv_vec_based_column_order() {
13900        // Verify Vec-based parser preserves header-to-data mapping exactly.
13901        // BTreeMap sorts alphabetically, so we use alpha-ordered headers.
13902        let input = "alpha,bravo,charlie\n1,2,3\n4,5,6\n";
13903        let frame = read_csv_str(input).expect("parse");
13904        let keys: Vec<&String> = frame.columns().keys().collect();
13905        assert_eq!(keys, &["alpha", "bravo", "charlie"]);
13906        assert_eq!(frame.column("alpha").unwrap().values()[0], Scalar::Int64(1));
13907        assert_eq!(frame.column("bravo").unwrap().values()[0], Scalar::Int64(2));
13908        assert_eq!(
13909            frame.column("charlie").unwrap().values()[1],
13910            Scalar::Int64(6)
13911        );
13912        eprintln!("[TEST] test_csv_vec_based_column_order | rows=2 cols=3 parse_ok=true | PASS");
13913    }
13914
13915    #[test]
13916    fn read_csv_object_fallback_preserves_original_text() {
13917        // pandas does column-level inference: when a column falls back to object
13918        // dtype, every non-NA cell keeps its VERBATIM source text. A bool-like
13919        // token in non-canonical case ("true") or a zero-padded number ("01")
13920        // must survive unchanged once the column is object. Verified vs live
13921        // pandas 2.2.3: read_csv('c\ntrue\nfalse\nmaybe\n')['c'] is
13922        // ['true','false','maybe'] (object), and '01'/'02' stay '01'/'02'.
13923        let cases: &[(&str, &str)] = &[
13924            // Uniform native columns are unaffected.
13925            ("c\n1\n2\n3\n", "c\n1\n2\n3\n"),
13926            ("c\ntrue\nfalse\n", "c\nTrue\nFalse\n"), // pure bool -> bool dtype
13927            // Object fallbacks keep original literals.
13928            ("c\n1\n2\nabc\n", "c\n1\n2\nabc\n"),
13929            ("c\ntrue\nfalse\nmaybe\n", "c\ntrue\nfalse\nmaybe\n"),
13930            ("c\nTrue\nFalse\nmaybe\n", "c\nTrue\nFalse\nmaybe\n"),
13931            ("c\n01\n02\nabc\n", "c\n01\n02\nabc\n"),
13932        ];
13933        for (input, expected_csv) in cases {
13934            let frame = read_csv_str(input).expect("read");
13935            let out = write_csv_string(&frame).expect("write");
13936            assert_eq!(
13937                &out, expected_csv,
13938                "round-trip mismatch for input {input:?}"
13939            );
13940        }
13941    }
13942
13943    #[test]
13944    fn read_csv_typed_numeric_fast_path_promotes_in_one_pass() {
13945        let input = "i,f\n1,0\n2,0.5\n3,1.25\n";
13946        let headers = vec!["i".to_owned(), "f".to_owned()];
13947
13948        let fast = super::try_read_csv_str_typed_numeric(input, &headers)
13949            .expect("typed parse")
13950            .expect("numeric fast path");
13951        assert_eq!(fast.column("i").expect("i").dtype(), DType::Int64);
13952        assert_eq!(fast.column("f").expect("f").dtype(), DType::Float64);
13953        assert_eq!(fast.column("i").expect("i").values()[2], Scalar::Int64(3));
13954        assert_eq!(
13955            fast.column("f").expect("f").values()[2],
13956            Scalar::Float64(1.25)
13957        );
13958
13959        let object_input = "c\n01\nabc\n";
13960        let object_headers = vec!["c".to_owned()];
13961        assert!(
13962            super::try_read_csv_str_typed_numeric(object_input, &object_headers)
13963                .expect("object probe")
13964                .is_none()
13965        );
13966        let object_frame = read_csv_str(object_input).expect("fallback read");
13967        assert_eq!(object_frame.index().int64_unit_range_labels(), Some((0, 2)));
13968        assert_eq!(
13969            write_csv_string(&object_frame).expect("fallback write"),
13970            object_input
13971        );
13972    }
13973
13974    #[test]
13975    fn read_csv_simple_typed_numeric_fast_path_builds_typed_columns() {
13976        let input = "i,f\n1,0\n2,0.5\n3,1.25\n";
13977        let headers = vec!["i".to_owned(), "f".to_owned()];
13978
13979        let fast = super::try_read_csv_str_simple_typed_numeric(input, &headers)
13980            .expect("simple typed parse")
13981            .expect("simple numeric fast path");
13982
13983        assert_eq!(fast.column("i").expect("i").dtype(), DType::Int64);
13984        assert_eq!(fast.column("f").expect("f").dtype(), DType::Float64);
13985        assert_eq!(fast.index().int64_unit_range_labels(), Some((0, 3)));
13986        assert_eq!(fast.index().labels()[2], IndexLabel::Int64(2));
13987        assert_eq!(fast.column("i").expect("i").values()[2], Scalar::Int64(3));
13988        assert_eq!(
13989            fast.column("f").expect("f").values()[2],
13990            Scalar::Float64(1.25)
13991        );
13992    }
13993
13994    #[test]
13995    fn read_csv_simple_typed_numeric_fast_path_rejects_quoted_fields() {
13996        let input = "x\n\"1.5\"\n";
13997        let headers = vec!["x".to_owned()];
13998
13999        assert!(
14000            super::try_read_csv_str_simple_typed_numeric(input, &headers)
14001                .expect("simple probe")
14002                .is_none()
14003        );
14004
14005        let frame = read_csv_str(input).expect("fallback read");
14006        assert_eq!(frame.column("x").expect("x").dtype(), DType::Float64);
14007        assert_eq!(
14008            frame.column("x").expect("x").values(),
14009            &[Scalar::Float64(1.5)]
14010        );
14011    }
14012
14013    #[test]
14014    fn read_csv_simple_typed_numeric_fast_path_accepts_crlf_rows() {
14015        let input = "i,f\n1,0\r\n2,0.5\r\n";
14016        let headers = vec!["i".to_owned(), "f".to_owned()];
14017
14018        let fast = super::try_read_csv_str_simple_typed_numeric(input, &headers)
14019            .expect("simple typed parse")
14020            .expect("simple numeric fast path");
14021
14022        assert_eq!(fast.len(), 2);
14023        assert_eq!(fast.column("i").expect("i").values()[1], Scalar::Int64(2));
14024        assert_eq!(
14025            fast.column("f").expect("f").values()[1],
14026            Scalar::Float64(0.5)
14027        );
14028    }
14029
14030    #[test]
14031    fn read_csv_simple_parallel_chunks_preserve_order_and_promotion() {
14032        use std::fmt::Write as _;
14033
14034        let headers = vec!["i".to_owned(), "f".to_owned()];
14035        let mut data = String::new();
14036        for row in 0..32 {
14037            let float_value = row as f64 * 0.25;
14038            writeln!(data, "{row},{float_value}").expect("write row");
14039        }
14040
14041        let (columns, row_count) =
14042            super::parse_simple_numeric_csv_parallel_chunks(data.as_bytes(), headers.len(), 3)
14043                .expect("parallel chunk parse");
14044        let frame = super::build_typed_numeric_csv_frame(&headers, columns, row_count)
14045            .expect("frame build");
14046
14047        assert_eq!(frame.len(), 32);
14048        assert_eq!(frame.column("i").expect("i").dtype(), DType::Int64);
14049        assert_eq!(frame.column("f").expect("f").dtype(), DType::Float64);
14050        assert_eq!(frame.column("i").expect("i").values()[0], Scalar::Int64(0));
14051        assert_eq!(
14052            frame.column("i").expect("i").values()[31],
14053            Scalar::Int64(31)
14054        );
14055        assert_eq!(
14056            frame.column("f").expect("f").values()[31],
14057            Scalar::Float64(7.75)
14058        );
14059    }
14060
14061    #[test]
14062    fn read_csv_typed_numeric_fast_path_rejects_non_all_valid_numeric_semantics() {
14063        let headers = vec!["x".to_owned()];
14064        for input in [
14065            "x\nNaN\n1.0\n",
14066            "x\nNAN\n1.0\n",
14067            "x\n+NaN\n1.0\n",
14068            "x\n NaN \n1.0\n",
14069            "x\n\"NAN\"\n1.0\n",
14070            "x\ntrue\nfalse\n",
14071            "x\n \n1\n",
14072        ] {
14073            assert!(
14074                super::try_read_csv_str_typed_numeric(input, &headers)
14075                    .expect("probe")
14076                    .is_none(),
14077                "fast path must reject {input:?}"
14078            );
14079        }
14080
14081        let nan_frame = read_csv_str("x\nNaN\n1.0\n").expect("fallback nan");
14082        assert_eq!(
14083            nan_frame.column("x").expect("x").values(),
14084            &[Scalar::Null(NullKind::NaN), Scalar::Float64(1.0)]
14085        );
14086
14087        let bool_frame = read_csv_str("x\ntrue\nfalse\n").expect("fallback bool");
14088        assert_eq!(
14089            bool_frame.column("x").expect("x").values(),
14090            &[Scalar::Bool(true), Scalar::Bool(false)]
14091        );
14092
14093        let padded_nan_frame = read_csv_str("x\n NaN \n1.0\n").expect("fallback padded nan");
14094        let padded_nan_column = padded_nan_frame.column("x").expect("x");
14095        assert!(padded_nan_column.has_nulls());
14096        assert!(!padded_nan_column.validity().get(0));
14097        assert!(padded_nan_column.values()[0].is_missing());
14098    }
14099
14100    #[test]
14101    fn read_csv_typed_numeric_fast_path_keeps_all_valid_float_edges() {
14102        let input = "x\n-0.0\ninf\n-inf\n1\n";
14103        let headers = vec!["x".to_owned()];
14104        let frame = super::try_read_csv_str_typed_numeric(input, &headers)
14105            .expect("typed parse")
14106            .expect("numeric fast path");
14107        let column = frame.column("x").expect("x");
14108
14109        assert_eq!(column.dtype(), DType::Float64);
14110        assert!(!column.has_nulls());
14111        assert!(column.validity().all());
14112        let values = column.values();
14113        assert_eq!(
14114            values,
14115            &[
14116                Scalar::Float64(-0.0),
14117                Scalar::Float64(f64::INFINITY),
14118                Scalar::Float64(f64::NEG_INFINITY),
14119                Scalar::Float64(1.0)
14120            ]
14121        );
14122        let negative_zero_bits = match values[0] {
14123            Scalar::Float64(value) => Some(value.to_bits()),
14124            _ => None,
14125        };
14126        assert_eq!(negative_zero_bits, Some((-0.0f64).to_bits()));
14127    }
14128
14129    #[test]
14130    fn read_csv_typed_numeric_fast_path_preserves_ragged_row_errors() {
14131        let long_row = "a,b\n1,2,3\n";
14132        let err = read_csv_str(long_row).expect_err("long row must reject");
14133        assert!(matches!(err, IoError::Csv(_)), "got {err:?}");
14134    }
14135
14136    #[test]
14137    fn to_csv_datetime_is_column_uniform_like_pandas() {
14138        // pandas to_csv renders a datetime column with ONE format: date-only
14139        // when all values are midnight, else YYYY-MM-DD HH:MM:SS with a
14140        // sub-second suffix sized to the column's finest resolution. Verified
14141        // vs live pandas 2.2.3. (Previously format_datetime_ns dropped the
14142        // sub-second component and always wrote 00:00:00.)
14143        fn dt_frame(nanos: &[i64]) -> DataFrame {
14144            let values: Vec<Scalar> = nanos.iter().map(|&n| Scalar::Datetime64(n)).collect();
14145            let col = Column::new(DType::Datetime64, values).expect("col");
14146            let mut cols = BTreeMap::new();
14147            cols.insert("d".to_string(), col);
14148            let index = Index::from_i64((0..nanos.len() as i64).collect());
14149            DataFrame::new_with_column_order(index, cols, vec!["d".to_string()]).expect("frame")
14150        }
14151        const MIDNIGHT_JAN1: i64 = 1_577_836_800_000_000_000; // 2020-01-01 00:00:00Z
14152        const MIDNIGHT_JAN2: i64 = 1_577_923_200_000_000_000; // 2020-01-02 00:00:00Z
14153        const JAN2_0300: i64 = 1_577_934_000_000_000_000; // 2020-01-02 03:00:00Z
14154        const JAN1_HALF: i64 = 1_577_836_800_500_000_000; // 2020-01-01 00:00:00.5Z
14155
14156        // All midnight -> date only.
14157        assert_eq!(
14158            write_csv_string(&dt_frame(&[MIDNIGHT_JAN1, MIDNIGHT_JAN2])).expect("w"),
14159            "d\n2020-01-01\n2020-01-02\n"
14160        );
14161        // Sub-second present -> whole column gets .fff (millis), incl. .000.
14162        assert_eq!(
14163            write_csv_string(&dt_frame(&[JAN1_HALF, MIDNIGHT_JAN1])).expect("w"),
14164            "d\n2020-01-01 00:00:00.500\n2020-01-01 00:00:00.000\n"
14165        );
14166        // Time present, no sub-second -> HH:MM:SS for all (midnight -> 00:00:00).
14167        assert_eq!(
14168            write_csv_string(&dt_frame(&[JAN2_0300, MIDNIGHT_JAN1])).expect("w"),
14169            "d\n2020-01-02 03:00:00\n2020-01-01 00:00:00\n"
14170        );
14171        // NaT in an otherwise date-only column -> date only, NaT -> quoted "".
14172        assert_eq!(
14173            write_csv_string(&dt_frame(&[MIDNIGHT_JAN1, i64::MIN])).expect("w"),
14174            "d\n2020-01-01\n\"\"\n"
14175        );
14176    }
14177
14178    #[test]
14179    fn to_csv_datetime_index_is_column_uniform_like_pandas() {
14180        use super::{CsvWriteOptions, write_csv_string_with_options};
14181        // A DatetimeIndex written with index=True follows the same column-
14182        // uniform rule as a datetime column. Verified vs live pandas 2.2.3.
14183        fn dt_index_frame(nanos: &[i64]) -> DataFrame {
14184            let labels: Vec<IndexLabel> =
14185                nanos.iter().map(|&n| IndexLabel::Datetime64(n)).collect();
14186            let index = Index::new(labels);
14187            let values: Vec<Scalar> = (0..nanos.len() as i64).map(Scalar::Int64).collect();
14188            let col = Column::new(DType::Int64, values).expect("col");
14189            let mut cols = BTreeMap::new();
14190            cols.insert("v".to_string(), col);
14191            DataFrame::new_with_column_order(index, cols, vec!["v".to_string()]).expect("frame")
14192        }
14193        let opts = CsvWriteOptions {
14194            include_index: true,
14195            ..Default::default()
14196        };
14197        // All midnight -> date only.
14198        assert_eq!(
14199            write_csv_string_with_options(
14200                &dt_index_frame(&[1_577_836_800_000_000_000, 1_577_923_200_000_000_000]),
14201                &opts
14202            )
14203            .expect("w"),
14204            ",v\n2020-01-01,0\n2020-01-02,1\n"
14205        );
14206        // Sub-second (ms) -> .fff on every label.
14207        assert_eq!(
14208            write_csv_string_with_options(
14209                &dt_index_frame(&[1_577_836_800_500_000_000, 1_577_836_800_250_000_000]),
14210                &opts
14211            )
14212            .expect("w"),
14213            ",v\n2020-01-01 00:00:00.500,0\n2020-01-01 00:00:00.250,1\n"
14214        );
14215    }
14216
14217    #[test]
14218    fn read_csv_with_options_object_fallback_preserves_text() {
14219        use super::{CsvReadOptions, read_csv_with_options};
14220        // Same pandas object-fallback rule on the options reader (custom
14221        // delimiter / na_values etc.): non-canonical bool casing and zero-padded
14222        // numbers survive once the column is object. Verified vs pandas 2.2.3.
14223        let tsv = CsvReadOptions {
14224            delimiter: b'\t',
14225            ..Default::default()
14226        };
14227        let frame = read_csv_with_options("c\ntrue\nfalse\nmaybe\n", &tsv).expect("read");
14228        assert_eq!(frame.index().int64_unit_range_labels(), Some((0, 3)));
14229        let out = write_csv_string(&frame).expect("write");
14230        assert_eq!(out, "c\ntrue\nfalse\nmaybe\n");
14231
14232        let frame2 = read_csv_with_options("c\n01\n02\nabc\n", &tsv).expect("read");
14233        let out2 = write_csv_string(&frame2).expect("write");
14234        assert_eq!(out2, "c\n01\n02\nabc\n");
14235
14236        // Pure-bool column still infers bool dtype (writes True/False).
14237        let frame3 = read_csv_with_options("c\ntrue\nfalse\n", &tsv).expect("read");
14238        let out3 = write_csv_string(&frame3).expect("write");
14239        assert_eq!(out3, "c\nTrue\nFalse\n");
14240
14241        // Custom na_values: an NA cell in an object column stays missing while
14242        // the surrounding original literals are preserved.
14243        let na_opts = CsvReadOptions {
14244            delimiter: b'\t',
14245            na_values: vec!["MISSING".to_string()],
14246            ..Default::default()
14247        };
14248        let frame4 = read_csv_with_options("c\ntrue\nMISSING\nmaybe\n", &na_opts).expect("read");
14249        let out4 = write_csv_string(&frame4).expect("write");
14250        // The lone empty NaN field in a single-column object frame is quoted "".
14251        assert_eq!(out4, "c\ntrue\n\"\"\nmaybe\n");
14252    }
14253
14254    #[test]
14255    fn to_csv_float_format_matches_pandas_str() {
14256        // pandas to_csv writes floats via Python str(float): whole numbers keep
14257        // ".0", decimals use the shortest round-trip, and extreme magnitudes use
14258        // signed two-digit scientific notation. Verified vs live pandas 2.2.3.
14259        let cases: &[(f64, &str)] = &[
14260            (1.0, "1.0"),
14261            (3.0, "3.0"),
14262            (100.0, "100.0"),
14263            (-7.0, "-7.0"),
14264            (2.5, "2.5"),
14265            (0.5, "0.5"),
14266            (0.1, "0.1"),
14267            (1.0 / 3.0, "0.3333333333333333"),
14268            (1234567890123456.0, "1234567890123456.0"),
14269            (1e16, "1e+16"),
14270            (1e20, "1e+20"),
14271            (1e-5, "1e-05"),
14272            (0.0001, "0.0001"),
14273            (1e-7, "1e-07"),
14274            (f64::INFINITY, "inf"),
14275            (f64::NEG_INFINITY, "-inf"),
14276        ];
14277        for (v, expected) in cases {
14278            assert_eq!(
14279                &format_pandas_float(*v),
14280                expected,
14281                "format_pandas_float({v})"
14282            );
14283        }
14284    }
14285
14286    #[test]
14287    fn to_csv_single_column_nan_quotes_empty_and_keeps_float_repr() {
14288        // The lone empty NaN field is already quoted as "" by the csv writer so
14289        // read_csv doesn't drop it (NOT a blank line). The real fix here is the
14290        // float repr: 1.0/3.0 must stay "1.0"/"3.0", not collapse to "1"/"3".
14291        let frame = read_csv_str("x\n1.0\nNaN\n3.0\n").expect("read");
14292        assert!(frame.column("x").unwrap().values()[1].is_missing());
14293        let out = write_csv_string(&frame).expect("write");
14294        assert_eq!(out, "x\n1.0\n\"\"\n3.0\n");
14295    }
14296
14297    #[test]
14298    fn test_csv_capacity_hint_reasonable() {
14299        // Generate a ~1MB CSV and verify it parses correctly.
14300        // The capacity hint (input.len / (cols*8)) should avoid excessive reallocs.
14301        let mut csv = String::with_capacity(1_100_000);
14302        csv.push_str("a,b,c,d,e\n");
14303        let target_rows = 50_000; // ~20 bytes/row * 50k ≈ 1MB
14304        for i in 0..target_rows {
14305            csv.push_str(&format!("{},{},{},{},{}\n", i, i * 2, i * 3, i * 4, i * 5));
14306        }
14307        assert!(csv.len() > 500_000, "CSV should be large");
14308
14309        let frame = read_csv_str(&csv).expect("parse large CSV");
14310        assert_eq!(frame.index().len(), target_rows);
14311        assert_eq!(frame.columns().len(), 5);
14312        // Spot-check last row
14313        assert_eq!(
14314            frame.column("a").unwrap().values()[target_rows - 1],
14315            Scalar::Int64((target_rows - 1) as i64)
14316        );
14317        eprintln!(
14318            "[TEST] test_csv_capacity_hint_reasonable | rows={target_rows} cols=5 parse_ok=true | PASS"
14319        );
14320    }
14321
14322    #[test]
14323    fn test_csv_empty_columns() {
14324        // CSV with headers but no data rows -> empty DataFrame with correct column names.
14325        let input = "x,y,z\n";
14326        let frame = read_csv_str(input).expect("parse");
14327        assert_eq!(frame.index().len(), 0);
14328        let keys: Vec<&String> = frame.columns().keys().collect();
14329        assert_eq!(keys, &["x", "y", "z"]);
14330        for col in frame.columns().values() {
14331            assert!(col.is_empty());
14332        }
14333        eprintln!("[TEST] test_csv_empty_columns | rows=0 cols=3 parse_ok=true | PASS");
14334    }
14335
14336    #[test]
14337    fn test_csv_comment_skips_lines() {
14338        let input = "# header comment\nname,age\n# inline comment\nalice,30\nbob,25\n";
14339        let options = CsvReadOptions {
14340            comment: Some(b'#'),
14341            ..CsvReadOptions::default()
14342        };
14343        let frame = read_csv_with_options(input, &options).expect("parse");
14344        assert_eq!(frame.index().len(), 2);
14345        let names: Vec<&String> = frame.column_names().into_iter().collect();
14346        assert_eq!(names, vec!["name", "age"]);
14347        assert_eq!(
14348            frame.column("name").unwrap().values()[0],
14349            Scalar::Utf8("alice".to_string())
14350        );
14351        assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
14352    }
14353
14354    #[test]
14355    fn test_csv_comment_none_preserves_comment_lines() {
14356        // Without comment set, a leading "#"-line should become part of parsing
14357        // (and fail as duplicate/missing-headers or be treated as data).
14358        let input = "name,age\nalice,30\n";
14359        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14360        assert_eq!(frame.index().len(), 1);
14361    }
14362
14363    #[test]
14364    fn read_csv_with_default_options_matches_read_csv_str() {
14365        for input in [
14366            "i,f,s\n1,2.5,abc\n3,4.0,def\n",
14367            "flag\ntrue\nfalse\nmaybe\n",
14368        ] {
14369            let expected = read_csv_str(input).expect("default read");
14370            let actual =
14371                read_csv_with_options(input, &CsvReadOptions::default()).expect("options read");
14372
14373            assert_eq!(actual.index().len(), expected.index().len());
14374            assert_eq!(actual.column_names(), expected.column_names());
14375            for name in expected.column_names() {
14376                let expected_col = expected.column(name).expect("expected column");
14377                let actual_col = actual.column(name).expect("actual column");
14378                assert_eq!(actual_col.dtype(), expected_col.dtype());
14379                assert_eq!(actual_col.values(), expected_col.values());
14380            }
14381        }
14382    }
14383
14384    #[test]
14385    fn read_csv_str_cache_reuses_exact_successful_input() {
14386        let input = "x,y\n1,2.5\n3,4.5\n";
14387
14388        let first = read_csv_str(input).expect("first parse");
14389        let second = read_csv_str(input).expect("cached parse");
14390
14391        assert_eq!(second.index().len(), first.index().len());
14392        assert_eq!(second.column_names(), first.column_names());
14393        for name in first.column_names() {
14394            let first_col = first.column(name).expect("first column");
14395            let second_col = second.column(name).expect("second column");
14396            assert_eq!(second_col.dtype(), first_col.dtype());
14397            assert_eq!(second_col.values(), first_col.values());
14398        }
14399    }
14400
14401    #[test]
14402    fn read_csv_str_cache_is_content_addressed() {
14403        let mut input = String::from("x\n1\n2\n");
14404        let first = read_csv_str(&input).expect("first parse");
14405        assert_eq!(first.column("x").unwrap().values()[0], Scalar::Int64(1));
14406
14407        input.clear();
14408        input.push_str("x\n9\n10\n");
14409        let second = read_csv_str(&input).expect("changed-content parse");
14410
14411        assert_eq!(second.index().len(), 2);
14412        assert_eq!(second.column("x").unwrap().values()[0], Scalar::Int64(9));
14413        assert_eq!(second.column("x").unwrap().values()[1], Scalar::Int64(10));
14414    }
14415
14416    #[test]
14417    fn read_csv_no_na_cache_reuses_exact_successful_input() {
14418        let options = CsvReadOptions {
14419            na_filter: false,
14420            ..CsvReadOptions::default()
14421        };
14422        let input = "x,y\n1,2.5\n3,4.5\n";
14423
14424        let first = read_csv_with_options(input, &options).expect("first no-na parse");
14425        let second = read_csv_with_options(input, &options).expect("cached no-na parse");
14426
14427        assert_eq!(second.index().len(), first.index().len());
14428        assert_eq!(second.column_names(), first.column_names());
14429        for name in first.column_names() {
14430            let first_col = first.column(name).expect("first column");
14431            let second_col = second.column(name).expect("second column");
14432            assert_eq!(second_col.dtype(), first_col.dtype());
14433            assert_eq!(second_col.values(), first_col.values());
14434        }
14435    }
14436
14437    #[test]
14438    fn csv_parse_cache_keeps_default_and_no_na_modes_separate() {
14439        let input = "mode_sep_a,mode_sep_b\n11,12.5\n13,14.5\n";
14440        let no_na_options = CsvReadOptions {
14441            na_filter: false,
14442            ..CsvReadOptions::default()
14443        };
14444
14445        let no_na_frame = read_csv_with_options(input, &no_na_options).expect("no-na parse");
14446        assert!(super::csv_parse_cache_lookup(super::CsvParseCacheMode::Default, input).is_none());
14447        assert!(
14448            super::csv_parse_cache_lookup(super::CsvParseCacheMode::NoNaNumeric, input).is_some()
14449        );
14450
14451        let default_frame = read_csv_str(input).expect("default parse");
14452        let default_cached =
14453            super::csv_parse_cache_lookup(super::CsvParseCacheMode::Default, input)
14454                .expect("default cache entry");
14455        let no_na_cached =
14456            super::csv_parse_cache_lookup(super::CsvParseCacheMode::NoNaNumeric, input)
14457                .expect("no-na cache entry");
14458
14459        assert_eq!(default_cached.column_names(), default_frame.column_names());
14460        assert_eq!(no_na_cached.column_names(), no_na_frame.column_names());
14461        assert_eq!(
14462            default_cached.column("mode_sep_b").unwrap().values(),
14463            default_frame.column("mode_sep_b").unwrap().values()
14464        );
14465        assert_eq!(
14466            no_na_cached.column("mode_sep_b").unwrap().values(),
14467            no_na_frame.column("mode_sep_b").unwrap().values()
14468        );
14469    }
14470
14471    #[test]
14472    fn csv_default_options_fast_path_excludes_behavioral_options() {
14473        assert!(super::csv_read_options_match_default_fast_path(
14474            &CsvReadOptions::default()
14475        ));
14476
14477        for options in [
14478            CsvReadOptions {
14479                delimiter: b'\t',
14480                ..CsvReadOptions::default()
14481            },
14482            CsvReadOptions {
14483                na_filter: false,
14484                ..CsvReadOptions::default()
14485            },
14486            CsvReadOptions {
14487                nrows: Some(1),
14488                ..CsvReadOptions::default()
14489            },
14490            CsvReadOptions {
14491                comment: Some(b'#'),
14492                ..CsvReadOptions::default()
14493            },
14494            CsvReadOptions {
14495                thousands: Some(b','),
14496                ..CsvReadOptions::default()
14497            },
14498        ] {
14499            assert!(!super::csv_read_options_match_default_fast_path(&options));
14500        }
14501    }
14502
14503    #[test]
14504    fn read_csv_no_na_filter_numeric_fast_path_preserves_numeric_columns() {
14505        let options = CsvReadOptions {
14506            na_filter: false,
14507            ..CsvReadOptions::default()
14508        };
14509        assert!(super::csv_read_options_match_no_na_numeric_fast_path(
14510            &options
14511        ));
14512
14513        let frame = read_csv_with_options("i,f\n1,2.5\n3,4\n", &options).expect("parse");
14514        assert_eq!(frame.len(), 2);
14515        assert_eq!(frame.column("i").expect("i").dtype(), DType::Int64);
14516        assert_eq!(frame.column("f").expect("f").dtype(), DType::Float64);
14517        assert_eq!(frame.column("i").expect("i").values()[1], Scalar::Int64(3));
14518        assert_eq!(
14519            frame.column("f").expect("f").values()[0],
14520            Scalar::Float64(2.5)
14521        );
14522    }
14523
14524    #[test]
14525    fn read_csv_no_na_filter_fast_path_falls_back_for_empty_field() {
14526        let options = CsvReadOptions {
14527            na_filter: false,
14528            ..CsvReadOptions::default()
14529        };
14530        let frame = read_csv_with_options("x,y\n1,\n2,3\n", &options).expect("parse");
14531        let y = frame.column("y").expect("y");
14532
14533        assert_eq!(y.dtype(), DType::Utf8);
14534        assert_eq!(y.values()[0], Scalar::Utf8(String::new()));
14535        assert_eq!(y.values()[1], Scalar::Utf8("3".to_owned()));
14536    }
14537
14538    #[test]
14539    fn test_csv_comment_custom_char() {
14540        let input = "% this is ignored\nname,age\nalice,30\n";
14541        let options = CsvReadOptions {
14542            comment: Some(b'%'),
14543            ..CsvReadOptions::default()
14544        };
14545        let frame = read_csv_with_options(input, &options).expect("parse");
14546        assert_eq!(frame.index().len(), 1);
14547        assert_eq!(
14548            frame.column("name").unwrap().values()[0],
14549            Scalar::Utf8("alice".to_string())
14550        );
14551    }
14552
14553    #[test]
14554    fn test_csv_thousands_strips_int_separator() {
14555        let input = "amount\n\"1,234,567\"\n\"42\"\n";
14556        let options = CsvReadOptions {
14557            thousands: Some(b','),
14558            ..CsvReadOptions::default()
14559        };
14560        let frame = read_csv_with_options(input, &options).expect("parse");
14561        assert_eq!(
14562            frame.column("amount").unwrap().values()[0],
14563            Scalar::Int64(1234567)
14564        );
14565        assert_eq!(
14566            frame.column("amount").unwrap().values()[1],
14567            Scalar::Int64(42)
14568        );
14569    }
14570
14571    #[test]
14572    fn test_csv_thousands_strips_float_with_custom_decimal() {
14573        // European convention: '.' as thousands, ',' as decimal.
14574        let input = "price\n\"1.234,56\"\n";
14575        let options = CsvReadOptions {
14576            thousands: Some(b'.'),
14577            decimal: b',',
14578            ..CsvReadOptions::default()
14579        };
14580        let frame = read_csv_with_options(input, &options).expect("parse");
14581        let v = frame.column("price").unwrap().values()[0].clone();
14582        assert!(matches!(v, Scalar::Float64(_)), "expected Float64");
14583        let Scalar::Float64(f) = v else { return };
14584        assert!((f - 1234.56).abs() < 1e-9);
14585    }
14586
14587    #[test]
14588    fn test_csv_thousands_none_keeps_separator_as_string() {
14589        // Without thousands set, "1,234" in a single field stays Utf8.
14590        let input = "amount\n\"1,234\"\n";
14591        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14592        assert_eq!(
14593            frame.column("amount").unwrap().values()[0],
14594            Scalar::Utf8("1,234".to_string())
14595        );
14596    }
14597
14598    #[test]
14599    fn test_csv_thousands_equal_to_decimal_is_ignored() {
14600        // pandas silently ignores thousands if it equals decimal.
14601        let input = "v\n\"1.234\"\n";
14602        let options = CsvReadOptions {
14603            thousands: Some(b'.'),
14604            decimal: b'.',
14605            ..CsvReadOptions::default()
14606        };
14607        let frame = read_csv_with_options(input, &options).expect("parse");
14608        let v = frame.column("v").unwrap().values()[0].clone();
14609        // thousands ignored → "1.234" parses as float 1.234
14610        assert!(matches!(v, Scalar::Float64(_)), "expected Float64");
14611        let Scalar::Float64(f) = v else { return };
14612        assert!((f - 1.234).abs() < 1e-9);
14613    }
14614
14615    #[test]
14616    fn test_csv_thousands_does_not_affect_non_numeric() {
14617        let input = "name\n\"a,b\"\n";
14618        let options = CsvReadOptions {
14619            thousands: Some(b','),
14620            ..CsvReadOptions::default()
14621        };
14622        let frame = read_csv_with_options(input, &options).expect("parse");
14623        assert_eq!(
14624            frame.column("name").unwrap().values()[0],
14625            Scalar::Utf8("a,b".to_string())
14626        );
14627    }
14628
14629    #[test]
14630    fn test_csv_quotechar_custom_single_quote() {
14631        let input = "name,remark\n'alice','loves, cats'\n";
14632        let options = CsvReadOptions {
14633            quotechar: b'\'',
14634            ..CsvReadOptions::default()
14635        };
14636        let frame = read_csv_with_options(input, &options).expect("parse");
14637        assert_eq!(
14638            frame.column("name").unwrap().values()[0],
14639            Scalar::Utf8("alice".to_string())
14640        );
14641        assert_eq!(
14642            frame.column("remark").unwrap().values()[0],
14643            Scalar::Utf8("loves, cats".to_string())
14644        );
14645    }
14646
14647    #[test]
14648    fn test_csv_doublequote_true_collapses_doubled_quotes() {
14649        // The field is `she said ""hi""` with doubled inner quotes.
14650        let input = "text\n\"she said \"\"hi\"\"\"\n";
14651        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14652        assert_eq!(
14653            frame.column("text").unwrap().values()[0],
14654            Scalar::Utf8("she said \"hi\"".to_string())
14655        );
14656    }
14657
14658    #[test]
14659    fn test_csv_doublequote_false_requires_escapechar() {
14660        // With doublequote=false and escapechar=\, \" escapes the quote.
14661        let input = "text\n\"hi\\\"there\"\n";
14662        let options = CsvReadOptions {
14663            doublequote: false,
14664            escapechar: Some(b'\\'),
14665            ..CsvReadOptions::default()
14666        };
14667        let frame = read_csv_with_options(input, &options).expect("parse");
14668        assert_eq!(
14669            frame.column("text").unwrap().values()[0],
14670            Scalar::Utf8("hi\"there".to_string())
14671        );
14672    }
14673
14674    #[test]
14675    fn test_csv_lineterminator_semicolon() {
14676        // Single-byte record separator '|'. No newlines in the data.
14677        let input = "a,b|1,x|2,y|3,z";
14678        let options = CsvReadOptions {
14679            lineterminator: Some(b'|'),
14680            ..CsvReadOptions::default()
14681        };
14682        let frame = read_csv_with_options(input, &options).expect("parse");
14683        assert_eq!(frame.index().len(), 3);
14684        assert_eq!(frame.column_names(), vec!["a", "b"]);
14685        assert_eq!(frame.column("a").unwrap().values()[2], Scalar::Int64(3));
14686    }
14687
14688    #[test]
14689    fn test_csv_lineterminator_default_none_accepts_crlf() {
14690        let input = "a,b\r\n1,x\r\n2,y\r\n";
14691        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14692        assert_eq!(frame.index().len(), 2);
14693    }
14694
14695    #[test]
14696    fn test_csv_lineterminator_interacts_with_skipfooter() {
14697        let input = "a|1|2|3|4|FOOTER";
14698        let options = CsvReadOptions {
14699            lineterminator: Some(b'|'),
14700            skipfooter: 1,
14701            ..CsvReadOptions::default()
14702        };
14703        let frame = read_csv_with_options(input, &options).expect("parse");
14704        // 5 data rows after header, footer drops 1 → 4 rows.
14705        assert_eq!(frame.index().len(), 4);
14706    }
14707
14708    #[test]
14709    fn test_csv_skipfooter_drops_trailing_rows() {
14710        let input = "a,b\n1,x\n2,y\n3,z\nTOTAL,summary\n";
14711        let options = CsvReadOptions {
14712            skipfooter: 1,
14713            ..CsvReadOptions::default()
14714        };
14715        let frame = read_csv_with_options(input, &options).expect("parse");
14716        assert_eq!(frame.index().len(), 3);
14717        assert_eq!(frame.column("a").unwrap().values()[2], Scalar::Int64(3));
14718    }
14719
14720    #[test]
14721    fn test_csv_skipfooter_zero_is_noop() {
14722        let input = "a,b\n1,x\n2,y\n";
14723        let frame_default =
14724            read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14725        let options = CsvReadOptions {
14726            skipfooter: 0,
14727            ..CsvReadOptions::default()
14728        };
14729        let frame_zero = read_csv_with_options(input, &options).expect("parse");
14730        assert_eq!(frame_default.index().len(), frame_zero.index().len());
14731    }
14732
14733    #[test]
14734    fn test_csv_skipfooter_larger_than_data_clears_rows() {
14735        let input = "a,b\n1,x\n2,y\n";
14736        let options = CsvReadOptions {
14737            skipfooter: 10,
14738            ..CsvReadOptions::default()
14739        };
14740        let frame = read_csv_with_options(input, &options).expect("parse");
14741        assert_eq!(frame.index().len(), 0);
14742        // Columns and headers are still preserved.
14743        assert_eq!(frame.column_names().len(), 2);
14744    }
14745
14746    #[test]
14747    fn test_csv_skipfooter_with_nrows() {
14748        // nrows caps read to 4, then skipfooter drops last 1 → 3 rows.
14749        let input = "a\n1\n2\n3\n4\n5\n";
14750        let options = CsvReadOptions {
14751            nrows: Some(4),
14752            skipfooter: 1,
14753            ..CsvReadOptions::default()
14754        };
14755        let frame = read_csv_with_options(input, &options).expect("parse");
14756        assert_eq!(frame.index().len(), 3);
14757    }
14758
14759    #[test]
14760    fn test_csv_escapechar_none_default_keeps_backslash_literal() {
14761        // Without escapechar set, backslash is just a normal character.
14762        let input = "text\n\"foo\\bar\"\n";
14763        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
14764        assert_eq!(
14765            frame.column("text").unwrap().values()[0],
14766            Scalar::Utf8("foo\\bar".to_string())
14767        );
14768    }
14769
14770    #[test]
14771    fn test_csv_single_column() {
14772        // CSV with one column, many rows -> correct parsing.
14773        let mut csv = String::from("value\n");
14774        for i in 0..500 {
14775            csv.push_str(&format!("{}\n", i));
14776        }
14777        let frame = read_csv_str(&csv).expect("parse");
14778        assert_eq!(frame.index().len(), 500);
14779        assert_eq!(frame.columns().len(), 1);
14780        assert_eq!(
14781            frame.column("value").unwrap().values()[499],
14782            Scalar::Int64(499)
14783        );
14784        eprintln!("[TEST] test_csv_single_column | rows=500 cols=1 parse_ok=true | PASS");
14785    }
14786
14787    #[test]
14788    fn test_csv_many_columns() {
14789        // CSV with 100+ columns -> all columns present, correct values.
14790        let col_count = 120;
14791        let headers: Vec<String> = (0..col_count).map(|i| format!("c{i:03}")).collect();
14792        let mut csv = headers.join(",");
14793        csv.push('\n');
14794        // 3 data rows
14795        for row in 0..3 {
14796            let vals: Vec<String> = (0..col_count)
14797                .map(|c| format!("{}", row * 1000 + c))
14798                .collect();
14799            csv.push_str(&vals.join(","));
14800            csv.push('\n');
14801        }
14802        let frame = read_csv_str(&csv).expect("parse");
14803        assert_eq!(frame.columns().len(), col_count);
14804        assert_eq!(frame.index().len(), 3);
14805        // Spot-check: c000 row 0 = 0, c119 row 2 = 2119
14806        assert_eq!(frame.column("c000").unwrap().values()[0], Scalar::Int64(0));
14807        assert_eq!(
14808            frame.column("c119").unwrap().values()[2],
14809            Scalar::Int64(2119)
14810        );
14811        eprintln!("[TEST] test_csv_many_columns | rows=3 cols={col_count} parse_ok=true | PASS");
14812    }
14813
14814    #[test]
14815    fn test_csv_mixed_dtypes() {
14816        // Columns with uniform int/float/string/bool/null -> correct type inference.
14817        let input = "ints,floats,strings,bools,nulls\n\
14818                     1,1.5,hello,true,\n\
14819                     2,2.7,world,false,\n\
14820                     3,3.14,foo,true,\n";
14821        let frame = read_csv_str(input).expect("parse");
14822
14823        let ints = frame.column("ints").unwrap();
14824        assert_eq!(ints.values()[0], Scalar::Int64(1));
14825
14826        let floats = frame.column("floats").unwrap();
14827        assert_eq!(floats.values()[1], Scalar::Float64(2.7));
14828
14829        let strings = frame.column("strings").unwrap();
14830        assert_eq!(strings.values()[2], Scalar::Utf8("foo".to_owned()));
14831
14832        let bools = frame.column("bools").unwrap();
14833        assert_eq!(bools.values()[0], Scalar::Bool(true));
14834        assert_eq!(bools.values()[1], Scalar::Bool(false));
14835
14836        // "nulls" column is all empty -> all null/NaN
14837        let nulls = frame.column("nulls").unwrap();
14838        for v in nulls.values() {
14839            assert!(v.is_missing(), "null column values should be missing");
14840        }
14841        eprintln!(
14842            "[TEST] test_csv_mixed_dtypes | rows=3 cols=5 parse_ok=true | dtype_per_col=[int64,float64,utf8,bool,null] | PASS"
14843        );
14844    }
14845
14846    #[test]
14847    fn test_csv_unicode_headers() {
14848        // CSV with unicode header names -> correct column names.
14849        let input = "名前,Größe,café\nAlice,170,latte\nBob,180,espresso\n";
14850        let frame = read_csv_str(input).expect("parse");
14851        assert!(frame.column("名前").is_some());
14852        assert!(frame.column("Größe").is_some());
14853        assert!(frame.column("café").is_some());
14854        assert_eq!(
14855            frame.column("名前").unwrap().values()[0],
14856            Scalar::Utf8("Alice".to_owned())
14857        );
14858        eprintln!("[TEST] test_csv_unicode_headers | rows=2 cols=3 parse_ok=true | PASS");
14859    }
14860
14861    #[test]
14862    fn test_csv_quoted_fields() {
14863        // CSV with quoted fields containing commas and newlines -> correct parsing.
14864        let input =
14865            "name,address\n\"Smith, John\",\"123 Main St\nApt 4\"\nJane,\"456 Oak, Suite 1\"\n";
14866        let frame = read_csv_str(input).expect("parse");
14867        assert_eq!(frame.index().len(), 2);
14868        assert_eq!(
14869            frame.column("name").unwrap().values()[0],
14870            Scalar::Utf8("Smith, John".to_owned())
14871        );
14872        // Quoted field with embedded newline
14873        let addr0 = &frame.column("address").unwrap().values()[0];
14874        assert!(
14875            matches!(addr0, Scalar::Utf8(s) if s.contains('\n')),
14876            "expected Utf8 containing embedded newline, got {addr0:?}"
14877        );
14878        eprintln!("[TEST] test_csv_quoted_fields | rows=2 cols=2 parse_ok=true | PASS");
14879    }
14880
14881    #[test]
14882    fn test_csv_trailing_newline() {
14883        // CSV with/without trailing newline -> identical DataFrame.
14884        let with = "a,b\n1,2\n3,4\n";
14885        let without = "a,b\n1,2\n3,4";
14886        let f1 = read_csv_str(with).expect("with newline");
14887        let f2 = read_csv_str(without).expect("without newline");
14888
14889        assert_eq!(f1.index().len(), f2.index().len());
14890        assert_eq!(f1.columns().len(), f2.columns().len());
14891        for key in f1.columns().keys() {
14892            let c1 = f1.column(key).unwrap();
14893            let c2 = f2.column(key).unwrap();
14894            assert_eq!(c1.values(), c2.values(), "column {key} mismatch");
14895        }
14896        eprintln!("[TEST] test_csv_trailing_newline | rows=2 cols=2 parse_ok=true | PASS");
14897    }
14898
14899    #[test]
14900    fn test_csv_round_trip_unchanged() {
14901        // read_csv_str then write_csv_string produces semantically equivalent output.
14902        let input = "id,name,score\n1,Alice,95.5\n2,Bob,87\n3,,100\n";
14903        let frame = read_csv_str(input).expect("read");
14904        let output = write_csv_string(&frame).expect("write");
14905        // Re-parse the output and compare
14906        let frame2 = read_csv_str(&output).expect("re-read");
14907        assert_eq!(frame.index().len(), frame2.index().len());
14908        for key in frame.columns().keys() {
14909            let c1 = frame.column(key).unwrap();
14910            let c2 = frame2.column(key).unwrap();
14911            assert!(
14912                c1.semantic_eq(c2),
14913                "column {key} not semantically equal after round-trip"
14914            );
14915        }
14916        eprintln!("[TEST] test_csv_round_trip_unchanged | rows=3 cols=3 parse_ok=true | PASS");
14917    }
14918
14919    #[test]
14920    fn test_write_csv_options_custom_delimiter() {
14921        let input = "a,b\n1,x\n2,y\n";
14922        let frame = read_csv_str(input).expect("read");
14923        let output = write_csv_string_with_options(
14924            &frame,
14925            &CsvWriteOptions {
14926                delimiter: b';',
14927                ..CsvWriteOptions::default()
14928            },
14929        )
14930        .expect("write");
14931        assert!(output.starts_with("a;b\n"));
14932        assert!(output.contains("1;x\n"));
14933        assert!(output.contains("2;y\n"));
14934    }
14935
14936    #[test]
14937    fn test_write_csv_options_na_rep_replaces_nulls() {
14938        let input = "id,name\n1,Alice\n2,\n";
14939        let frame = read_csv_str(input).expect("read");
14940        let output = write_csv_string_with_options(
14941            &frame,
14942            &CsvWriteOptions {
14943                na_rep: "NA".to_string(),
14944                ..CsvWriteOptions::default()
14945            },
14946        )
14947        .expect("write");
14948        // Second data row's name should render as NA, not empty.
14949        assert!(output.contains("2,NA\n"));
14950        assert!(!output.contains("2,\n"));
14951    }
14952
14953    #[test]
14954    fn test_write_csv_options_header_false_omits_header_row() {
14955        let input = "a,b\n1,2\n";
14956        let frame = read_csv_str(input).expect("read");
14957        let output = write_csv_string_with_options(
14958            &frame,
14959            &CsvWriteOptions {
14960                header: false,
14961                ..CsvWriteOptions::default()
14962            },
14963        )
14964        .expect("write");
14965        assert_eq!(output, "1,2\n");
14966    }
14967
14968    #[test]
14969    fn test_write_csv_options_include_index_and_index_label() {
14970        let input = "a,b\n1,2\n3,4\n";
14971        let frame = read_csv_str(input).expect("read");
14972        let output = write_csv_string_with_options(
14973            &frame,
14974            &CsvWriteOptions {
14975                include_index: true,
14976                index_label: Some("row_id".to_string()),
14977                ..CsvWriteOptions::default()
14978            },
14979        )
14980        .expect("write");
14981
14982        assert_eq!(output, "row_id,a,b\n0,1,2\n1,3,4\n");
14983    }
14984
14985    #[test]
14986    fn test_write_csv_options_include_index_uses_named_index_when_label_omitted() {
14987        let mut cols = std::collections::BTreeMap::new();
14988        cols.insert(
14989            "a".to_string(),
14990            Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
14991        );
14992        let frame = DataFrame::new_with_column_order(
14993            Index::from_i64(vec![100, 200]).set_name("sample_id"),
14994            cols,
14995            vec!["a".to_string()],
14996        )
14997        .unwrap();
14998
14999        let output = write_csv_string_with_options(
15000            &frame,
15001            &CsvWriteOptions {
15002                include_index: true,
15003                ..CsvWriteOptions::default()
15004            },
15005        )
15006        .expect("write");
15007
15008        assert_eq!(output, "sample_id,a\n100,10\n200,20\n");
15009    }
15010
15011    #[test]
15012    fn test_write_csv_options_include_index_label_overrides_index_name() {
15013        let mut cols = std::collections::BTreeMap::new();
15014        cols.insert(
15015            "a".to_string(),
15016            Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
15017        );
15018        let frame = DataFrame::new_with_column_order(
15019            Index::from_i64(vec![100, 200]).set_name("sample_id"),
15020            cols,
15021            vec!["a".to_string()],
15022        )
15023        .unwrap();
15024
15025        let output = write_csv_string_with_options(
15026            &frame,
15027            &CsvWriteOptions {
15028                include_index: true,
15029                index_label: Some("row".to_string()),
15030                ..CsvWriteOptions::default()
15031            },
15032        )
15033        .expect("write");
15034
15035        assert_eq!(output, "row,a\n100,10\n200,20\n");
15036    }
15037
15038    #[test]
15039    fn test_csv_multiindex_roundtrip_with_explicit_index_cols() {
15040        let frame = make_row_multiindex_test_dataframe();
15041        let csv = write_csv_string_with_options(
15042            &frame,
15043            &CsvWriteOptions {
15044                include_index: true,
15045                ..CsvWriteOptions::default()
15046            },
15047        )
15048        .expect("write");
15049
15050        let roundtrip = read_csv_with_index_cols(
15051            &csv,
15052            &CsvReadOptions::default(),
15053            &["region", "product", "year"],
15054        )
15055        .expect("read");
15056
15057        assert!(roundtrip.equals(&frame));
15058        assert_eq!(roundtrip.row_multiindex(), frame.row_multiindex());
15059    }
15060
15061    #[test]
15062    fn test_write_csv_options_default_matches_write_csv_string() {
15063        let input = "a,b\n1,2\n3,4\n";
15064        let frame = read_csv_str(input).expect("read");
15065        let default_output = write_csv_string(&frame).expect("write");
15066        let options_output =
15067            write_csv_string_with_options(&frame, &CsvWriteOptions::default()).expect("write");
15068        assert_eq!(default_output, options_output);
15069    }
15070
15071    #[test]
15072    fn test_write_csv_options_na_rep_with_float_nan() {
15073        // Generate a frame with an explicit NaN float.
15074        use fp_columnar::Column;
15075        let mut cols = std::collections::BTreeMap::new();
15076        cols.insert(
15077            "score".to_string(),
15078            Column::from_values(vec![Scalar::Float64(1.5), Scalar::Float64(f64::NAN)]).unwrap(),
15079        );
15080        let frame = DataFrame::new_with_column_order(
15081            Index::from_i64(vec![0, 1]),
15082            cols,
15083            vec!["score".to_string()],
15084        )
15085        .unwrap();
15086        let output = write_csv_string_with_options(
15087            &frame,
15088            &CsvWriteOptions {
15089                na_rep: "NaN".to_string(),
15090                ..CsvWriteOptions::default()
15091            },
15092        )
15093        .expect("write");
15094        assert!(output.contains("NaN"));
15095    }
15096
15097    #[test]
15098    fn test_csv_large_file_perf() {
15099        // 100K-row, 10-column CSV -> parse completes, correct row/column counts.
15100        let col_count = 10;
15101        let row_count = 100_000;
15102        let headers: Vec<String> = (0..col_count).map(|i| format!("col{i}")).collect();
15103        let mut csv = String::with_capacity(row_count * 50);
15104        csv.push_str(&headers.join(","));
15105        csv.push('\n');
15106        for r in 0..row_count {
15107            for c in 0..col_count {
15108                if c > 0 {
15109                    csv.push(',');
15110                }
15111                csv.push_str(&(r * col_count + c).to_string());
15112            }
15113            csv.push('\n');
15114        }
15115
15116        let frame = read_csv_str(&csv).expect("parse 100K rows");
15117        assert_eq!(frame.index().len(), row_count);
15118        assert_eq!(frame.columns().len(), col_count);
15119        // Spot-check first and last rows
15120        assert_eq!(frame.column("col0").unwrap().values()[0], Scalar::Int64(0));
15121        assert_eq!(
15122            frame.column("col9").unwrap().values()[row_count - 1],
15123            Scalar::Int64(((row_count - 1) * col_count + 9) as i64)
15124        );
15125        eprintln!(
15126            "[TEST] test_csv_large_file_perf | rows={row_count} cols={col_count} parse_ok=true | PASS"
15127        );
15128    }
15129
15130    #[test]
15131    fn test_csv_golden_output() {
15132        // Fixed CSV input -> write_csv_string output matches golden reference exactly.
15133        let input = "a,b,c\n1,hello,3.14\n2,,true\n3,world,\n";
15134        let frame = read_csv_str(input).expect("parse");
15135        let output = write_csv_string(&frame).expect("write");
15136
15137        // Golden reference: columns in BTreeMap order; Bool(true) coerced to Float64
15138        // in column c (which has Float64 + Bool → Float64), so true → 1.0. A Float64
15139        // column writes whole values with a trailing ".0" like pandas (str(float)).
15140        let expected = "a,b,c\n1,hello,3.14\n2,,1.0\n3,world,\n";
15141        assert_eq!(
15142            output, expected,
15143            "output does not match golden reference.\nGot:\n{output}\nExpected:\n{expected}"
15144        );
15145        eprintln!("[TEST] test_csv_golden_output | golden_match=true | PASS");
15146    }
15147
15148    // === bd-2gi.19: IO Complete Contract Tests ===
15149
15150    use super::{CsvOnBadLines, CsvReadOptions, read_csv_with_options};
15151
15152    #[test]
15153    fn csv_with_custom_delimiter() {
15154        let input = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
15155        let opts = CsvReadOptions {
15156            delimiter: b'\t',
15157            ..Default::default()
15158        };
15159        let frame = read_csv_with_options(input, &opts).expect("parse tsv");
15160        assert_eq!(frame.index().len(), 2);
15161        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15162    }
15163
15164    #[test]
15165    fn csv_without_headers_generates_default_names_and_keeps_first_row() {
15166        let input = "1,2\n3,4\n";
15167        let opts = CsvReadOptions {
15168            has_headers: false,
15169            ..Default::default()
15170        };
15171        let frame = read_csv_with_options(input, &opts).expect("parse");
15172        assert_eq!(frame.index().len(), 2);
15173        assert_eq!(
15174            frame.column("column_0").unwrap().values()[0],
15175            Scalar::Int64(1)
15176        );
15177        assert_eq!(
15178            frame.column("column_1").unwrap().values()[0],
15179            Scalar::Int64(2)
15180        );
15181        assert_eq!(
15182            frame.column("column_0").unwrap().values()[1],
15183            Scalar::Int64(3)
15184        );
15185        assert_eq!(
15186            frame.column("column_1").unwrap().values()[1],
15187            Scalar::Int64(4)
15188        );
15189    }
15190
15191    #[test]
15192    fn csv_usecols_missing_column_errors() {
15193        let input = "a,b\n1,2\n";
15194        let opts = CsvReadOptions {
15195            usecols: Some(vec!["c".to_string()]),
15196            ..Default::default()
15197        };
15198        let err = read_csv_with_options(input, &opts).expect_err("missing usecols");
15199        assert!(
15200            matches!(err, IoError::MissingUsecols(missing) if missing == vec!["c".to_string()])
15201        );
15202    }
15203
15204    #[test]
15205    fn csv_without_headers_supports_generated_index_col_name() {
15206        let input = "10,alpha\n20,beta\n";
15207        let opts = CsvReadOptions {
15208            has_headers: false,
15209            index_col: Some("column_0".into()),
15210            ..Default::default()
15211        };
15212        let frame = read_csv_with_options(input, &opts).expect("parse");
15213        assert_eq!(frame.index().len(), 2);
15214        assert_eq!(frame.index().labels()[0], IndexLabel::Int64(10));
15215        assert_eq!(frame.index().labels()[1], IndexLabel::Int64(20));
15216        assert!(frame.column("column_0").is_none());
15217        assert_eq!(
15218            frame.column("column_1").unwrap().values()[0],
15219            Scalar::Utf8("alpha".into())
15220        );
15221        assert_eq!(
15222            frame.column("column_1").unwrap().values()[1],
15223            Scalar::Utf8("beta".into())
15224        );
15225    }
15226
15227    #[test]
15228    fn csv_with_na_values() {
15229        let input = "a,b\n1,NA\n2,n/a\n3,valid\n";
15230        let opts = CsvReadOptions {
15231            na_values: vec!["NA".into(), "n/a".into()],
15232            ..Default::default()
15233        };
15234        let frame = read_csv_with_options(input, &opts).expect("parse");
15235        let b = frame.column("b").unwrap();
15236        assert!(b.values()[0].is_missing());
15237        assert!(b.values()[1].is_missing());
15238        assert_eq!(b.values()[2], Scalar::Utf8("valid".into()));
15239    }
15240
15241    #[test]
15242    fn csv_none_is_default_na() {
15243        // "None" is a pandas default NA value (Python's None)
15244        let input = "a,b\n1,None\n2,valid\n";
15245        let frame = read_csv_str(input).expect("parse");
15246        let b = frame.column("b").unwrap();
15247        assert!(b.values()[0].is_missing(), "None should be parsed as NA");
15248        assert_eq!(b.values()[1], Scalar::Utf8("valid".into()));
15249    }
15250
15251    #[test]
15252    fn csv_scalar_inference_matches_pandas_2_2_3() {
15253        // Per-cell type inference verified against pandas 2.2.3 read_csv.
15254        let cell = |csv: &str| {
15255            let frame = read_csv_str(&format!("x\n{csv}\n")).expect("parse");
15256            frame.column("x").unwrap().values()[0].clone()
15257        };
15258        // Signed / leading-zero integers parse as Int64 (Rust + pandas agree).
15259        assert_eq!(cell("+1"), Scalar::Int64(1));
15260        assert_eq!(cell("01"), Scalar::Int64(1));
15261        assert_eq!(cell("-5"), Scalar::Int64(-5));
15262        // Scientific notation is float64 in pandas.
15263        assert_eq!(cell("1e3"), Scalar::Float64(1000.0));
15264        // inf / -inf are float values (NOT default-NA tokens, unlike nan).
15265        assert_eq!(cell("inf"), Scalar::Float64(f64::INFINITY));
15266        assert_eq!(cell("-inf"), Scalar::Float64(f64::NEG_INFINITY));
15267        // Bool inference is case-insensitive in pandas 2.2.3.
15268        assert_eq!(cell("TRUE"), Scalar::Bool(true));
15269        assert_eq!(cell("true"), Scalar::Bool(true));
15270        assert_eq!(cell("False"), Scalar::Bool(false));
15271        // Surrounding whitespace is trimmed ONLY for numeric inference.
15272        assert_eq!(cell(" 1 "), Scalar::Int64(1));
15273        assert_eq!(cell("  3.5  "), Scalar::Float64(3.5));
15274        // Non-numeric, non-bool stays Utf8.
15275        assert_eq!(cell("hello"), Scalar::Utf8("hello".into()));
15276        // NA markers, booleans, and plain strings keep their surrounding
15277        // whitespace — a padded value is a STRING, not null/bool. Verified vs
15278        // live pandas 2.2.3: " abc "/"true "/" NA " all stay object strings.
15279        assert_eq!(cell(" abc "), Scalar::Utf8(" abc ".into()));
15280        assert_eq!(cell("true "), Scalar::Utf8("true ".into()));
15281        assert_eq!(cell(" True "), Scalar::Utf8(" True ".into()));
15282        assert_eq!(cell(" NA "), Scalar::Utf8(" NA ".into()));
15283        assert!(!matches!(cell(" NA "), Scalar::Null(_)));
15284    }
15285
15286    #[test]
15287    fn json_write_non_finite_floats_as_null_like_pandas() {
15288        // pandas to_json(orient="records") converts inf / -inf / NaN to JSON
15289        // `null` (JSON has no inf/nan literals). Verified vs pandas 2.2.3:
15290        // read_csv("x\n1.5\ninf\n-inf\n").to_json(orient="records")
15291        //   == [{"x":1.5},{"x":null},{"x":null}]
15292        let frame = read_csv_str("x\n1.5\ninf\n-inf\n").expect("parse");
15293        let json = write_json_string(&frame, JsonOrient::Records).expect("json");
15294        assert_eq!(json, r#"[{"x":1.5},{"x":null},{"x":null}]"#);
15295    }
15296
15297    #[test]
15298    fn csv_default_na_token_set_matches_pandas_table() {
15299        let default_tokens = [
15300            "", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
15301            "1.#QNAN", "<NA>", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null",
15302        ];
15303        for token in default_tokens {
15304            assert!(super::is_pandas_default_na(token), "{token:?}");
15305        }
15306
15307        for token in ["none", "NAN", "n/a ", " NULL", "0", "false"] {
15308            assert!(!super::is_pandas_default_na(token), "{token:?}");
15309        }
15310    }
15311
15312    #[test]
15313    fn csv_keep_default_na_false() {
15314        // With keep_default_na=false, only custom na_values are recognized
15315        let input = "a,b\n1,NA\n2,CUSTOM\n3,valid\n";
15316        let opts = CsvReadOptions {
15317            na_values: vec!["CUSTOM".into()],
15318            keep_default_na: false,
15319            ..Default::default()
15320        };
15321        let frame = read_csv_with_options(input, &opts).expect("parse");
15322        let b = frame.column("b").unwrap();
15323        // "NA" should NOT be missing because keep_default_na=false
15324        assert_eq!(b.values()[0], Scalar::Utf8("NA".into()));
15325        // "CUSTOM" should be missing because it's in na_values
15326        assert!(b.values()[1].is_missing());
15327        assert_eq!(b.values()[2], Scalar::Utf8("valid".into()));
15328    }
15329
15330    #[test]
15331    fn csv_na_filter_false() {
15332        // With na_filter=false, no NA detection at all (for performance)
15333        let input = "a,b\n1,NA\n2,\n3,None\n";
15334        let opts = CsvReadOptions {
15335            na_filter: false,
15336            ..Default::default()
15337        };
15338        let frame = read_csv_with_options(input, &opts).expect("parse");
15339        let b = frame.column("b").unwrap();
15340        // All values should be kept as strings, no NA detection
15341        assert_eq!(b.values()[0], Scalar::Utf8("NA".into()));
15342        assert_eq!(b.values()[1], Scalar::Utf8("".into()));
15343        assert_eq!(b.values()[2], Scalar::Utf8("None".into()));
15344    }
15345
15346    #[test]
15347    fn csv_with_index_col() {
15348        let input = "id,val\na,10\nb,20\nc,30\n";
15349        let opts = CsvReadOptions {
15350            index_col: Some("id".into()),
15351            ..Default::default()
15352        };
15353        let frame = read_csv_with_options(input, &opts).expect("parse");
15354        assert_eq!(frame.index().len(), 3);
15355        assert_eq!(
15356            frame.index().labels()[0],
15357            fp_index::IndexLabel::Utf8("a".into())
15358        );
15359        assert!(frame.column("id").is_none());
15360        assert_eq!(frame.column("val").unwrap().values()[0], Scalar::Int64(10));
15361    }
15362
15363    #[test]
15364    fn csv_with_missing_index_col_errors() {
15365        let input = "id,val\na,10\nb,20\n";
15366        let opts = CsvReadOptions {
15367            index_col: Some("missing".into()),
15368            ..Default::default()
15369        };
15370
15371        let err = read_csv_with_options(input, &opts).expect_err("missing index_col should error");
15372        assert!(
15373            matches!(&err, IoError::MissingIndexColumn(name) if name == "missing"),
15374            "expected MissingIndexColumn(\"missing\"), got {err:?}"
15375        );
15376    }
15377
15378    #[test]
15379    fn csv_with_malformed_row_errors() {
15380        let input = "a,b\n1,2\n3\n";
15381        let opts = CsvReadOptions::default();
15382
15383        let err = read_csv_with_options(input, &opts).expect_err("malformed CSV row should error");
15384        assert!(
15385            matches!(&err, IoError::Csv(_)),
15386            "expected CSV parser error for ragged row, got {err:?}"
15387        );
15388    }
15389
15390    #[test]
15391    fn csv_on_bad_lines_skip_skips_extra_field_rows() {
15392        let input = "a,b\n1,2\n3,4,5\n6,7\n";
15393        let opts = CsvReadOptions {
15394            on_bad_lines: CsvOnBadLines::Skip,
15395            ..Default::default()
15396        };
15397
15398        let frame = read_csv_with_options(input, &opts).expect("parse with skipped bad line");
15399        assert_eq!(frame.index().len(), 2);
15400        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15401        assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Int64(2));
15402        assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(6));
15403        assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(7));
15404    }
15405
15406    #[test]
15407    fn csv_on_bad_lines_warn_skips_extra_field_rows() {
15408        let input = "a,b\n1,2\n3,4,5\n6,7\n";
15409        let opts = CsvReadOptions {
15410            on_bad_lines: CsvOnBadLines::Warn,
15411            ..Default::default()
15412        };
15413
15414        let frame = read_csv_with_options(input, &opts).expect("parse with warned bad line");
15415        assert_eq!(frame.index().len(), 2);
15416        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15417        assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(7));
15418    }
15419
15420    #[test]
15421    fn csv_on_bad_lines_skip_preserves_short_rows_as_missing() {
15422        // DISC-011: Int64 columns with missing values stay Int64 (extension dtype parity).
15423        // Missing values use NullKind::Null (pd.NA semantics) not NullKind::NaN.
15424        let input = "a,b\n1,2\n3\n6,7\n";
15425        let opts = CsvReadOptions {
15426            on_bad_lines: CsvOnBadLines::Skip,
15427            ..Default::default()
15428        };
15429
15430        let frame = read_csv_with_options(input, &opts).expect("parse short row");
15431        assert_eq!(frame.index().len(), 3);
15432        assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
15433        assert_eq!(
15434            frame.column("b").unwrap().values()[1],
15435            Scalar::Null(NullKind::Null)
15436        );
15437    }
15438
15439    #[test]
15440    fn json_temporal_values_and_index_are_epoch_millis() {
15441        // pandas to_json (date_unit='ms') serializes datetime64/timedelta64 as
15442        // epoch-millisecond integers for both values and index — not ISO/format
15443        // strings (values) nor raw nanoseconds (index). (br-frankenpandas-lb0iu)
15444        // 2020-01-01T00:00:00 == 1_577_836_800_000 ms; 1s timedelta == 1000 ms.
15445        let dt_ns = 1_577_836_800_000_000_000_i64;
15446        let mut columns = BTreeMap::new();
15447        columns.insert(
15448            "d".to_owned(),
15449            Column::from_values(vec![Scalar::Datetime64(dt_ns)]).expect("d"),
15450        );
15451        columns.insert(
15452            "t".to_owned(),
15453            Column::from_values(vec![Scalar::Timedelta64(1_000_000_000)]).expect("t"),
15454        );
15455        let index = Index::new(vec![IndexLabel::Datetime64(dt_ns)]);
15456        let frame =
15457            DataFrame::new_with_column_order(index, columns, vec!["d".to_owned(), "t".to_owned()])
15458                .expect("frame");
15459
15460        let out = write_json_string(&frame, JsonOrient::Columns).expect("json");
15461        assert!(
15462            out.contains("1577836800000"),
15463            "datetime value/index should be epoch-millis int, got {out}"
15464        );
15465        assert!(
15466            out.contains("1000"),
15467            "timedelta value should be epoch-millis int (1000), got {out}"
15468        );
15469        // Must NOT contain the old ISO string or raw-nanosecond forms.
15470        assert!(
15471            !out.contains("2020-01-01"),
15472            "should not emit ISO string: {out}"
15473        );
15474        assert!(
15475            !out.contains("1577836800000000000"),
15476            "should not emit raw nanoseconds: {out}"
15477        );
15478    }
15479
15480    #[test]
15481    fn json_records_read_write_roundtrip() {
15482        let input = r#"[{"name":"Alice","age":30},{"name":"Bob","age":25}]"#;
15483        let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15484        assert_eq!(frame.index().len(), 2);
15485        assert_eq!(
15486            frame.column("name").unwrap().values()[0],
15487            Scalar::Utf8("Alice".into())
15488        );
15489        assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
15490
15491        let output = write_json_string(&frame, JsonOrient::Records).expect("write");
15492        let frame2 = read_json_str(&output, JsonOrient::Records).expect("re-read");
15493        assert_eq!(frame2.index().len(), 2);
15494    }
15495
15496    #[test]
15497    fn json_records_nullable_int_roundtrip_is_stable() {
15498        let input = r#"[{"city":"Boston","temp":72},{"city":"Paris","temp":null}]"#;
15499        let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15500        let output = write_json_string(&frame, JsonOrient::Records).expect("write records");
15501        let frame2 = read_json_str(&output, JsonOrient::Records).expect("re-read records");
15502
15503        assert!(frame.equals(&frame2));
15504    }
15505
15506    #[test]
15507    fn json_records_preserves_column_order() {
15508        let input = r#"[{"b":1,"a":2},{"c":3}]"#;
15509        let frame = read_json_str(input, JsonOrient::Records).expect("read json records");
15510        let order: Vec<&str> = frame
15511            .column_names()
15512            .iter()
15513            .map(|name| name.as_str())
15514            .collect();
15515        assert_eq!(order, vec!["b", "a", "c"]);
15516    }
15517
15518    #[test]
15519    fn json_columns_read_write_roundtrip() {
15520        let input = r#"{"name":{"row_a":"Alice","row_b":"Bob"},"age":{"row_a":30,"row_b":25}}"#;
15521        let frame = read_json_str(input, JsonOrient::Columns).expect("read json columns");
15522        assert_eq!(frame.index().len(), 2);
15523        assert_eq!(frame.index().labels()[0], IndexLabel::Utf8("row_a".into()));
15524
15525        let output = write_json_string(&frame, JsonOrient::Columns).expect("write");
15526        let frame2 = read_json_str(&output, JsonOrient::Columns).expect("re-read");
15527        assert_eq!(frame2.index().labels(), frame.index().labels());
15528    }
15529
15530    #[test]
15531    fn json_columns_write_duplicate_index_rejects() {
15532        let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("1".into())]);
15533        let mut columns = BTreeMap::new();
15534        columns.insert(
15535            "v".into(),
15536            Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col"),
15537        );
15538        let frame = DataFrame::new(index, columns).expect("frame");
15539
15540        let err = write_json_string(&frame, JsonOrient::Columns)
15541            .expect_err("duplicate JSON object keys should reject");
15542        assert!(
15543            matches!(&err, IoError::JsonFormat(msg) if msg.contains("duplicate index label key")),
15544            "expected duplicate-index-key JsonFormat, got {err:?}"
15545        );
15546    }
15547
15548    #[test]
15549    fn json_split_read_write_roundtrip() {
15550        let input = r#"{"columns":["x","y"],"index":["r1","r2","r3"],"data":[[1,4],[2,5],[3,6]]}"#;
15551        let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15552        assert_eq!(frame.index().len(), 3);
15553        assert_eq!(
15554            frame.index().labels()[0],
15555            fp_index::IndexLabel::Utf8("r1".into())
15556        );
15557        assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15558        assert_eq!(frame.column("y").unwrap().values()[2], Scalar::Int64(6));
15559
15560        let output = write_json_string(&frame, JsonOrient::Split).expect("write");
15561        let frame2 = read_json_str(&output, JsonOrient::Split).expect("re-read");
15562        assert_eq!(frame2.index().len(), 3);
15563        assert_eq!(frame2.index().labels(), frame.index().labels());
15564    }
15565
15566    #[test]
15567    fn json_records_multiindex_roundtrip_restores_logical_row_axis() {
15568        let frame = make_row_multiindex_test_dataframe();
15569        let json = write_json_string(&frame, JsonOrient::Records).expect("write");
15570        let roundtrip = read_json_str(&json, JsonOrient::Records).expect("read");
15571
15572        assert!(roundtrip.equals(&frame));
15573        assert!(roundtrip.row_multiindex().is_some());
15574        assert!(roundtrip.column("__index_level_0__").is_none());
15575    }
15576
15577    #[test]
15578    fn json_split_multiindex_roundtrip_restores_logical_row_axis() {
15579        let frame = make_row_multiindex_test_dataframe();
15580        let json = write_json_string(&frame, JsonOrient::Split).expect("write");
15581        let roundtrip = read_json_str(&json, JsonOrient::Split).expect("read");
15582
15583        assert!(roundtrip.equals(&frame));
15584        assert!(roundtrip.row_multiindex().is_some());
15585        assert!(roundtrip.column("__index_level_0__").is_none());
15586    }
15587
15588    #[test]
15589    fn json_split_without_index_defaults_to_range_index() {
15590        let input = r#"{"columns":["x"],"data":[[10],[20]]}"#;
15591        let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15592        assert_eq!(frame.index().labels()[0], fp_index::IndexLabel::Int64(0));
15593        assert_eq!(frame.index().labels()[1], fp_index::IndexLabel::Int64(1));
15594    }
15595
15596    #[test]
15597    fn json_split_index_length_mismatch_errors() {
15598        let input = r#"{"columns":["x"],"index":[0],"data":[[1],[2]]}"#;
15599        let err = read_json_str(input, JsonOrient::Split)
15600            .expect_err("split orient index/data length mismatch should error");
15601        assert!(
15602            matches!(&err, IoError::JsonFormat(msg) if msg.contains("index length")),
15603            "expected split index length error, got {err:?}"
15604        );
15605    }
15606
15607    #[test]
15608    fn json_split_row_length_mismatch_errors() {
15609        let input = r#"{"columns":["x","y"],"data":[[1],[2,3]]}"#;
15610        let err = read_json_str(input, JsonOrient::Split)
15611            .expect_err("split orient row length mismatch should error");
15612        assert!(
15613            matches!(&err, IoError::JsonFormat(msg) if msg.contains("row 0 length")),
15614            "expected split row length error, got {err:?}"
15615        );
15616    }
15617
15618    #[test]
15619    fn json_split_non_string_columns_are_stringified() {
15620        let input = r#"{"columns":[1,true,null,"name"],"data":[[10,20,30,40]]}"#;
15621        let frame = read_json_str(input, JsonOrient::Split).expect("read json split");
15622        assert_eq!(frame.column("1").unwrap().values()[0], Scalar::Int64(10));
15623        assert_eq!(frame.column("true").unwrap().values()[0], Scalar::Int64(20));
15624        assert_eq!(frame.column("null").unwrap().values()[0], Scalar::Int64(30));
15625        assert_eq!(frame.column("name").unwrap().values()[0], Scalar::Int64(40));
15626    }
15627
15628    #[test]
15629    fn json_split_duplicate_column_names_error() {
15630        let input = r#"{"columns":[1,"1"],"data":[[10,20]]}"#;
15631        let err = read_json_str(input, JsonOrient::Split).expect_err("dup columns");
15632        assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "1"));
15633    }
15634
15635    #[test]
15636    fn json_index_read_write_roundtrip() {
15637        let input = r#"{"row_a":{"name":"Alice","age":30},"row_b":{"name":"Bob","age":25}}"#;
15638        let frame = read_json_str(input, JsonOrient::Index).expect("read json index");
15639        assert_eq!(frame.index().len(), 2);
15640        assert_eq!(frame.index().labels()[0], IndexLabel::Utf8("row_a".into()));
15641        assert_eq!(
15642            frame.column("name").unwrap().values()[1],
15643            Scalar::Utf8("Bob".into())
15644        );
15645
15646        let output = write_json_string(&frame, JsonOrient::Index).expect("write");
15647        let frame2 = read_json_str(&output, JsonOrient::Index).expect("re-read");
15648        assert_eq!(frame2.index().labels(), frame.index().labels());
15649        assert_eq!(frame2.column("age").unwrap().values()[0], Scalar::Int64(30));
15650    }
15651
15652    #[test]
15653    fn json_index_preserves_column_order() {
15654        let input = r#"{"r1":{"b":1,"a":2},"r2":{"c":3}}"#;
15655        let frame = read_json_str(input, JsonOrient::Index).expect("parse");
15656        let order: Vec<&str> = frame
15657            .column_names()
15658            .iter()
15659            .map(|name| name.as_str())
15660            .collect();
15661        assert_eq!(order, vec!["b", "a", "c"]);
15662    }
15663
15664    #[test]
15665    fn json_index_missing_columns_null_fill() {
15666        let input = r#"{"r1":{"a":1},"r2":{"b":2}}"#;
15667        let frame = read_json_str(input, JsonOrient::Index).expect("parse");
15668        let a = frame.column("a").expect("a");
15669        let b = frame.column("b").expect("b");
15670
15671        assert_eq!(a.values()[0], Scalar::Float64(1.0));
15672        assert!(a.values()[1].is_missing());
15673        assert!(b.values()[0].is_missing());
15674        assert_eq!(b.values()[1], Scalar::Float64(2.0));
15675    }
15676
15677    #[test]
15678    fn json_index_write_duplicate_index_rejects() {
15679        let index = Index::new(vec![IndexLabel::Int64(1), IndexLabel::Utf8("1".into())]);
15680        let mut columns = BTreeMap::new();
15681        columns.insert(
15682            "v".into(),
15683            Column::from_values(vec![Scalar::Int64(10), Scalar::Int64(20)]).expect("col"),
15684        );
15685        let frame = DataFrame::new(index, columns).expect("frame");
15686
15687        let err = write_json_string(&frame, JsonOrient::Index)
15688            .expect_err("duplicate JSON object keys should reject");
15689        assert!(
15690            matches!(&err, IoError::JsonFormat(msg) if msg.contains("duplicate index label key")),
15691            "expected duplicate-index-key JsonFormat, got {err:?}"
15692        );
15693    }
15694
15695    #[test]
15696    fn json_index_read_non_object_row_rejects() {
15697        let input = r#"{"r1":{"a":1},"r2":[1,2]}"#;
15698        let err = read_json_str(input, JsonOrient::Index)
15699            .expect_err("index orient rows must be JSON objects");
15700        assert!(
15701            matches!(&err, IoError::JsonFormat(msg) if msg.contains("rows must be objects")),
15702            "expected row-object error, got {err:?}"
15703        );
15704    }
15705
15706    #[test]
15707    fn json_values_read_write_roundtrip() {
15708        let input = r#"[[1,"Alice"],[null,"Bob"]]"#;
15709        let frame = read_json_str(input, JsonOrient::Values).expect("read json values");
15710        assert_eq!(frame.index().len(), 2);
15711        assert_eq!(frame.column_names(), vec!["0", "1"]);
15712        assert_eq!(frame.column("0").unwrap().values()[0], Scalar::Float64(1.0));
15713        assert_eq!(
15714            frame.column("1").unwrap().values()[1],
15715            Scalar::Utf8("Bob".into())
15716        );
15717
15718        let output = write_json_string(&frame, JsonOrient::Values).expect("write");
15719        let frame2 = read_json_str(&output, JsonOrient::Values).expect("re-read");
15720        assert_eq!(frame2.index().len(), 2);
15721        assert_eq!(frame2.column_names(), frame.column_names());
15722        assert_eq!(
15723            frame2.column("0").unwrap().values(),
15724            frame.column("0").unwrap().values()
15725        );
15726        assert_eq!(
15727            frame2.column("1").unwrap().values(),
15728            frame.column("1").unwrap().values()
15729        );
15730    }
15731
15732    #[test]
15733    fn json_records_with_nulls() {
15734        let input = r#"[{"a":1,"b":null},{"a":null,"b":"hello"}]"#;
15735        let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15736        assert!(frame.column("a").unwrap().values()[1].is_missing());
15737        assert!(frame.column("b").unwrap().values()[0].is_missing());
15738    }
15739
15740    #[test]
15741    fn json_read_accepts_pandas_bare_nan_tokens() {
15742        let cases = [
15743            (JsonOrient::Records, r#"[{"a":NaN}]"#),
15744            (JsonOrient::Columns, r#"{"a":{"0":NaN}}"#),
15745            (
15746                JsonOrient::Split,
15747                r#"{"columns":["a"],"index":[0],"data":[[NaN]]}"#,
15748            ),
15749            (JsonOrient::Values, r#"[[NaN]]"#),
15750        ];
15751
15752        for (orient, input) in cases {
15753            let frame = read_json_str(input, orient).expect("parse bare NaN");
15754            let column_name = if orient == JsonOrient::Values {
15755                "0"
15756            } else {
15757                "a"
15758            };
15759            assert!(frame.column(column_name).unwrap().values()[0].is_missing());
15760        }
15761    }
15762
15763    #[test]
15764    fn json_records_write_preserves_nullable_int_column() {
15765        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
15766        let frame = DataFrame::from_dict_with_index(
15767            vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
15768            vec!["row".into(), "row".into()],
15769        )
15770        .unwrap();
15771        let json = write_json_string(&frame, JsonOrient::Records).expect("write");
15772        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
15773        assert_eq!(parsed, serde_json::json!([{"a": 1}, {"a": null}]));
15774    }
15775
15776    #[test]
15777    fn json_non_records_nullable_int_reads_promote_to_float() {
15778        let cases = [
15779            (JsonOrient::Columns, r#"{"a":{"0":1,"1":null}}"#),
15780            (JsonOrient::Index, r#"{"0":{"a":1},"1":{"a":null}}"#),
15781            (
15782                JsonOrient::Split,
15783                r#"{"columns":["a"],"index":[0,1],"data":[[1],[null]]}"#,
15784            ),
15785            (JsonOrient::Values, r#"[[1],[null]]"#),
15786        ];
15787
15788        for (orient, input) in cases {
15789            let frame = read_json_str(input, orient).expect("read json");
15790            let column_name = if orient == JsonOrient::Values {
15791                "0"
15792            } else {
15793                "a"
15794            };
15795            let values = frame.column(column_name).expect("column").values();
15796            assert_eq!(values[0], Scalar::Float64(1.0));
15797            assert!(matches!(values[1], Scalar::Null(NullKind::NaN)));
15798        }
15799    }
15800
15801    #[test]
15802    fn json_non_records_nullable_int_writes_preserve_int() {
15803        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
15804        let frame = DataFrame::from_dict(
15805            &["a"],
15806            vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
15807        )
15808        .unwrap();
15809
15810        let columns_json: serde_json::Value =
15811            serde_json::from_str(&write_json_string(&frame, JsonOrient::Columns).unwrap()).unwrap();
15812        assert_eq!(columns_json, serde_json::json!({"a": {"0": 1, "1": null}}));
15813
15814        let index_json: serde_json::Value =
15815            serde_json::from_str(&write_json_string(&frame, JsonOrient::Index).unwrap()).unwrap();
15816        assert_eq!(
15817            index_json,
15818            serde_json::json!({"0": {"a": 1}, "1": {"a": null}})
15819        );
15820
15821        let split_json: serde_json::Value =
15822            serde_json::from_str(&write_json_string(&frame, JsonOrient::Split).unwrap()).unwrap();
15823        assert_eq!(
15824            split_json,
15825            serde_json::json!({"columns": ["a"], "index": [0, 1], "data": [[1], [null]]})
15826        );
15827
15828        let values_json: serde_json::Value =
15829            serde_json::from_str(&write_json_string(&frame, JsonOrient::Values).unwrap()).unwrap();
15830        assert_eq!(values_json, serde_json::json!([[1], [null]]));
15831    }
15832
15833    #[test]
15834    fn json_records_empty_array() {
15835        let input = r#"[]"#;
15836        let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15837        assert_eq!(frame.index().len(), 0);
15838    }
15839
15840    #[test]
15841    fn json_records_mixed_numeric_coerces() {
15842        let input = r#"[{"v":1},{"v":2.5},{"v":true}]"#;
15843        let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15844        // Int64 + Float64 + Bool all coerce to Float64
15845        assert_eq!(frame.column("v").unwrap().values()[0], Scalar::Float64(1.0));
15846        assert_eq!(frame.column("v").unwrap().values()[1], Scalar::Float64(2.5));
15847        assert_eq!(frame.column("v").unwrap().values()[2], Scalar::Float64(1.0));
15848    }
15849
15850    #[test]
15851    fn json_records_mixed_utf8_numeric_preserves_object_values() {
15852        let input = r#"[{"v":1},{"v":"text"}]"#;
15853        let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15854        assert_eq!(
15855            frame.column("v").unwrap().values(),
15856            &[Scalar::Int64(1), Scalar::Utf8("text".into())]
15857        );
15858    }
15859
15860    #[test]
15861    fn file_csv_roundtrip() {
15862        let input = "a,b\n1,2\n3,4\n";
15863        let frame = read_csv_str(input).expect("parse");
15864
15865        let dir = std::env::temp_dir();
15866        let path = dir.join("fp_io_test_roundtrip.csv");
15867        super::write_csv(&frame, &path).expect("write file");
15868        let frame2 = super::read_csv(&path).expect("read file");
15869        assert_eq!(frame2.index().len(), 2);
15870        std::fs::remove_file(&path).ok();
15871    }
15872
15873    #[test]
15874    fn file_csv_with_options_path() {
15875        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
15876        let input = "id\tval\na\tNA\nb\t2\n";
15877        let dir = std::env::temp_dir();
15878        let path = dir.join("fp_io_test_options.csv");
15879        std::fs::write(&path, input).expect("write fixture");
15880
15881        let options = CsvReadOptions {
15882            delimiter: b'\t',
15883            na_values: vec!["NA".into()],
15884            index_col: Some("id".into()),
15885            ..Default::default()
15886        };
15887
15888        let frame = super::read_csv_with_options_path(&path, &options).expect("read with options");
15889        assert_eq!(
15890            frame.index().labels()[0],
15891            fp_index::IndexLabel::Utf8("a".into())
15892        );
15893        assert!(frame.column("id").is_none());
15894        assert!(frame.column("val").unwrap().values()[0].is_missing());
15895        assert_eq!(frame.column("val").unwrap().values()[1], Scalar::Int64(2));
15896
15897        std::fs::remove_file(&path).ok();
15898    }
15899
15900    #[test]
15901    fn file_json_roundtrip() {
15902        let input = r#"[{"x":1},{"x":2}]"#;
15903        let frame = read_json_str(input, JsonOrient::Records).expect("parse");
15904
15905        let dir = std::env::temp_dir();
15906        let path = dir.join("fp_io_test_roundtrip.json");
15907        super::write_json(&frame, &path, JsonOrient::Records).expect("write file");
15908        let frame2 = super::read_json(&path, JsonOrient::Records).expect("read file");
15909        assert_eq!(frame2.index().len(), 2);
15910        std::fs::remove_file(&path).ok();
15911    }
15912
15913    // ── read_table 4pwr9 ───────────────────────────────────────────────
15914
15915    #[test]
15916    fn read_table_str_parses_tab_separated_4pwr9() {
15917        let input = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
15918        let frame = super::read_table_str(input).expect("parse tsv");
15919        assert_eq!(frame.index().len(), 2);
15920        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
15921        assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Int64(6));
15922    }
15923
15924    #[test]
15925    fn read_table_with_options_overrides_default_delimiter_4pwr9() {
15926        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
15927        let input = "x\ty\n1\tNA\n2\t3\n";
15928        let opts = CsvReadOptions {
15929            na_values: vec!["NA".into()],
15930            ..Default::default()
15931        };
15932        let frame = super::read_table_with_options(input, &opts).expect("parse tsv with na");
15933        assert!(frame.column("y").unwrap().values()[0].is_missing());
15934        assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(3));
15935    }
15936
15937    #[test]
15938    fn read_table_with_options_honours_explicit_pipe_delimiter_4pwr9() {
15939        let input = "x|y\n1|2\n3|4\n";
15940        let opts = CsvReadOptions {
15941            delimiter: b'|',
15942            ..Default::default()
15943        };
15944        let frame = super::read_table_with_options(input, &opts).expect("parse pipe");
15945        assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15946        assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(4));
15947    }
15948
15949    // ── read_fwf 23n8u ─────────────────────────────────────────────────
15950
15951    #[test]
15952    fn read_fwf_str_with_colspecs_parses_aligned_records_23n8u() {
15953        let input = "name    age   active\nalice   30    true\nbob     25    false\n";
15954        let opts = super::FwfReadOptions {
15955            colspecs: Some(vec![(0, 8), (8, 14), (14, 20)]),
15956            true_values: vec!["true".into()],
15957            false_values: vec!["false".into()],
15958            ..Default::default()
15959        };
15960        let frame = super::read_fwf_str(input, &opts).expect("parse fwf");
15961        assert_eq!(frame.index().len(), 2);
15962        assert_eq!(
15963            frame.column("name").unwrap().values()[0],
15964            Scalar::Utf8("alice".into())
15965        );
15966        assert_eq!(frame.column("age").unwrap().values()[0], Scalar::Int64(30));
15967        assert_eq!(
15968            frame.column("active").unwrap().values()[0],
15969            Scalar::Bool(true)
15970        );
15971    }
15972
15973    #[test]
15974    fn read_fwf_str_with_widths_derives_colspecs_23n8u() {
15975        let input = "x  y \n1  2 \n3  4 \n";
15976        let opts = super::FwfReadOptions {
15977            widths: Some(vec![3, 3]),
15978            ..Default::default()
15979        };
15980        let frame = super::read_fwf_str(input, &opts).expect("parse fwf widths");
15981        assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
15982        assert_eq!(frame.column("y").unwrap().values()[1], Scalar::Int64(4));
15983    }
15984
15985    #[test]
15986    fn read_fwf_str_threads_na_handling_23n8u() {
15987        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
15988        let input = "id   val\nA    NA \nB    7  \n";
15989        let opts = super::FwfReadOptions {
15990            colspecs: Some(vec![(0, 5), (5, 9)]),
15991            na_values: vec!["NA".into()],
15992            ..Default::default()
15993        };
15994        let frame = super::read_fwf_str(input, &opts).expect("parse fwf na");
15995        let col = frame.column("val").unwrap().values();
15996        assert!(col[0].is_missing());
15997        assert_eq!(col[1], Scalar::Int64(7));
15998    }
15999
16000    #[test]
16001    fn read_fwf_rejects_both_colspecs_and_widths_23n8u() {
16002        let opts = super::FwfReadOptions {
16003            colspecs: Some(vec![(0, 3)]),
16004            widths: Some(vec![3]),
16005            ..Default::default()
16006        };
16007        let err = super::read_fwf_str("x\n1\n", &opts).expect_err("must reject");
16008        assert!(
16009            matches!(&err, super::IoError::Fwf(message) if message.contains("only one of")),
16010            "unexpected error: {err:?}"
16011        );
16012    }
16013
16014    #[test]
16015    fn read_fwf_infers_colspecs_when_specs_are_omitted_htdmp() {
16016        let opts = super::FwfReadOptions::default();
16017        let frame = super::read_fwf_str("a b\n1 2\n3 4\n", &opts).expect("infer fwf specs");
16018        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
16019        assert_eq!(frame.column("b").unwrap().values()[1], Scalar::Int64(4));
16020    }
16021
16022    #[test]
16023    fn read_fwf_infers_aligned_wide_colspecs_htdmp() {
16024        let input = "name    age   active\nalice   30    true\nbob     25    false\n";
16025        let opts = super::FwfReadOptions {
16026            true_values: vec!["true".into()],
16027            false_values: vec!["false".into()],
16028            ..Default::default()
16029        };
16030        let frame = super::read_fwf_str(input, &opts).expect("infer aligned fwf specs");
16031        assert_eq!(
16032            frame.column("name").unwrap().values()[0],
16033            Scalar::Utf8("alice".into())
16034        );
16035        assert_eq!(frame.column("age").unwrap().values()[1], Scalar::Int64(25));
16036        assert_eq!(
16037            frame.column("active").unwrap().values()[1],
16038            Scalar::Bool(false)
16039        );
16040    }
16041
16042    #[test]
16043    fn read_fwf_infer_honors_skiprows_and_skipfooter_htdmp() {
16044        let input = "ignored wide banner\nx y\n1 2\nfooter text ignored\n";
16045        let opts = super::FwfReadOptions {
16046            skiprows: 1,
16047            skipfooter: 1,
16048            ..Default::default()
16049        };
16050        let frame = super::read_fwf_str(input, &opts).expect("infer after skipping");
16051        assert_eq!(frame.column("x").unwrap().values()[0], Scalar::Int64(1));
16052        assert_eq!(frame.column("y").unwrap().values()[0], Scalar::Int64(2));
16053    }
16054
16055    // ── Deferred reader surfaces 2yy4d ─────────────────────────────────
16056
16057    #[test]
16058    fn read_clipboard_rejects_with_deferred_marker_2yy4d() {
16059        let err = super::read_clipboard().expect_err("must reject");
16060        assert!(
16061            matches!(&err, super::IoError::Deferred(message)
16062                if message.contains("read_clipboard") && message.contains("headless")),
16063            "unexpected error: {err:?}"
16064        );
16065    }
16066
16067    #[test]
16068    fn read_gbq_rejects_with_deferred_marker_2yy4d() {
16069        let err = super::read_gbq("SELECT 1", Some("proj")).expect_err("must reject");
16070        assert!(
16071            matches!(&err, super::IoError::Deferred(message)
16072                if message.contains("read_gbq") && message.contains("BigQuery")),
16073            "unexpected error: {err:?}"
16074        );
16075        let no_project_err = super::read_gbq("SELECT 1", None).expect_err("must reject");
16076        assert!(matches!(no_project_err, super::IoError::Deferred(_)));
16077    }
16078
16079    #[test]
16080    fn dataframe_deferred_writer_surfaces_report_method_names_e6jrk() {
16081        use super::DataFrameIoExt;
16082
16083        let frame = make_test_dataframe();
16084        let clipboard_err = frame
16085            .to_clipboard()
16086            .expect_err("must reject clipboard writer");
16087        assert!(
16088            matches!(&clipboard_err, super::IoError::Deferred(message) if message.contains("to_clipboard") && message.contains("headless"))
16089        );
16090
16091        let gbq_err = frame
16092            .to_gbq("dataset.table", Some("project"))
16093            .expect_err("must reject BigQuery writer");
16094        assert!(
16095            matches!(&gbq_err, super::IoError::Deferred(message) if message.contains("to_gbq") && message.contains("BigQuery"))
16096        );
16097
16098        let no_project_err = frame
16099            .to_gbq("dataset.table", None)
16100            .expect_err("must reject BigQuery writer without project");
16101        assert!(matches!(no_project_err, super::IoError::Deferred(_)));
16102    }
16103
16104    #[test]
16105    fn series_clipboard_writer_rejects_with_deferred_marker() {
16106        use super::SeriesIoExt;
16107
16108        let source = Series::from_values(
16109            "sales",
16110            vec!["r1".into(), "r2".into()],
16111            vec![Scalar::Int64(10), Scalar::Int64(12)],
16112        )
16113        .expect("source series");
16114        let err = source
16115            .to_clipboard()
16116            .expect_err("must reject series clipboard writer");
16117        assert!(
16118            matches!(&err, super::IoError::Deferred(message) if message.contains("to_clipboard") && message.contains("headless"))
16119        );
16120    }
16121
16122    #[test]
16123    fn read_sas_rejects_with_deferred_marker_2yy4d() {
16124        let path = std::path::Path::new("/nonexistent.sas7bdat");
16125        let err = super::read_sas(path).expect_err("must reject");
16126        assert!(
16127            matches!(&err, super::IoError::Deferred(message)
16128                if message.contains("read_sas") && message.contains("sas7bdat")),
16129            "unexpected error: {err:?}"
16130        );
16131    }
16132
16133    #[test]
16134    fn read_spss_rejects_with_deferred_marker_2yy4d() {
16135        let path = std::path::Path::new("/nonexistent.sav");
16136        let err = super::read_spss(path).expect_err("must reject");
16137        assert!(
16138            matches!(&err, super::IoError::Deferred(message)
16139                if message.contains("read_spss") && message.contains(".sav")),
16140            "unexpected error: {err:?}"
16141        );
16142    }
16143
16144    #[test]
16145    fn read_fwf_path_reads_fixed_width_file_23n8u() {
16146        let input = "a   b\n1   2\n3   4\n";
16147        let dir = std::env::temp_dir();
16148        let path = dir.join("fp_io_test_read_fwf_23n8u.txt");
16149        std::fs::write(&path, input).expect("write fixture");
16150
16151        let opts = super::FwfReadOptions {
16152            colspecs: Some(vec![(0, 4), (4, 5)]),
16153            ..Default::default()
16154        };
16155        let frame = super::read_fwf(&path, &opts).expect("read fwf path");
16156        assert_eq!(frame.index().len(), 2);
16157        assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
16158        assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Int64(2));
16159
16160        std::fs::remove_file(&path).ok();
16161    }
16162
16163    #[test]
16164    fn read_table_path_roundtrips_through_read_csv_path_4pwr9() {
16165        let input = "id\tval\na\t1\nb\t2\n";
16166        let dir = std::env::temp_dir();
16167        let path = dir.join("fp_io_test_read_table_4pwr9.tsv");
16168        std::fs::write(&path, input).expect("write fixture");
16169
16170        let frame = super::read_table(&path).expect("read tsv");
16171        assert_eq!(frame.index().len(), 2);
16172        assert_eq!(
16173            frame.column("id").unwrap().values()[0],
16174            Scalar::Utf8("a".into())
16175        );
16176        assert_eq!(frame.column("val").unwrap().values()[1], Scalar::Int64(2));
16177
16178        let opts = CsvReadOptions {
16179            index_col: Some("id".into()),
16180            ..Default::default()
16181        };
16182        let frame2 =
16183            super::read_table_with_options_path(&path, &opts).expect("read tsv with options");
16184        assert!(frame2.column("id").is_none());
16185        assert_eq!(
16186            frame2.index().labels()[0],
16187            fp_index::IndexLabel::Utf8("a".into())
16188        );
16189        assert_eq!(frame2.column("val").unwrap().values()[1], Scalar::Int64(2));
16190
16191        std::fs::remove_file(&path).ok();
16192    }
16193
16194    // ── Parquet I/O tests ──────────────────────────────────────────────
16195
16196    fn make_test_dataframe() -> DataFrame {
16197        use fp_types::DType;
16198
16199        let mut columns = BTreeMap::new();
16200        columns.insert(
16201            "ints".to_string(),
16202            Column::new(
16203                DType::Int64,
16204                vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
16205            )
16206            .unwrap(),
16207        );
16208        columns.insert(
16209            "floats".to_string(),
16210            Column::new(
16211                DType::Float64,
16212                vec![
16213                    Scalar::Float64(1.5),
16214                    Scalar::Float64(2.5),
16215                    Scalar::Float64(3.5),
16216                ],
16217            )
16218            .unwrap(),
16219        );
16220        columns.insert(
16221            "names".to_string(),
16222            Column::from_values(vec![
16223                Scalar::Utf8("alice".into()),
16224                Scalar::Utf8("bob".into()),
16225                Scalar::Utf8("carol".into()),
16226            ])
16227            .unwrap(),
16228        );
16229
16230        let labels = vec![
16231            IndexLabel::Int64(0),
16232            IndexLabel::Int64(1),
16233            IndexLabel::Int64(2),
16234        ];
16235        DataFrame::new_with_column_order(
16236            Index::new(labels),
16237            columns,
16238            vec![
16239                "ints".to_string(),
16240                "floats".to_string(),
16241                "names".to_string(),
16242            ],
16243        )
16244        .unwrap()
16245    }
16246
16247    #[test]
16248    fn dataframe_io_ext_pandas_named_aliases_cover_supported_writers() {
16249        use super::DataFrameIoExt;
16250
16251        let frame = make_test_dataframe();
16252        let csv = frame.to_csv_string().expect("csv string");
16253        assert_eq!(csv, super::write_csv_string(&frame).expect("free csv"));
16254        assert_eq!(
16255            frame.to_markdown_string().expect("markdown string"),
16256            write_markdown_string(&frame).expect("free markdown")
16257        );
16258        assert_eq!(
16259            frame.to_latex_string().expect("latex string"),
16260            write_latex_string(&frame).expect("free latex")
16261        );
16262        let dir = std::env::temp_dir();
16263        let stem = format!("fp_io_dataframe_io_ext_{}", std::process::id());
16264        let excel_path = dir.join(format!("{stem}.xlsx"));
16265        let feather_path = dir.join(format!("{stem}.feather"));
16266        let parquet_path = dir.join(format!("{stem}.parquet"));
16267
16268        frame.to_excel(&excel_path).expect("to_excel alias");
16269        frame.to_feather(&feather_path).expect("to_feather alias");
16270        frame.to_parquet(&parquet_path).expect("to_parquet alias");
16271
16272        assert!(
16273            std::fs::metadata(&excel_path)
16274                .expect("excel metadata")
16275                .len()
16276                > 0
16277        );
16278        assert_eq!(
16279            super::read_feather(&feather_path)
16280                .expect("read feather")
16281                .index()
16282                .len(),
16283            frame.index().len()
16284        );
16285        assert_eq!(
16286            super::read_parquet(&parquet_path)
16287                .expect("read parquet")
16288                .index()
16289                .len(),
16290            frame.index().len()
16291        );
16292
16293        std::fs::remove_file(&excel_path).ok();
16294        std::fs::remove_file(&feather_path).ok();
16295        std::fs::remove_file(&parquet_path).ok();
16296    }
16297
16298    #[test]
16299    fn dataframe_io_ext_rjs51_in_memory_methods_match_free_functions() {
16300        use super::DataFrameIoExt;
16301
16302        let frame = make_test_dataframe();
16303        let csv_options = CsvWriteOptions {
16304            delimiter: b';',
16305            na_rep: "<NA>".to_owned(),
16306            header: true,
16307            include_index: true,
16308            index_label: Some("row".to_owned()),
16309        };
16310        assert_eq!(
16311            frame
16312                .to_csv_string_with_options(&csv_options)
16313                .expect("csv options through extension"),
16314            write_csv_string_with_options(&frame, &csv_options).expect("csv options free fn")
16315        );
16316        assert_eq!(
16317            frame
16318                .to_json_string(JsonOrient::Split)
16319                .expect("json split through extension"),
16320            write_json_string(&frame, JsonOrient::Split).expect("json split free fn")
16321        );
16322        assert_eq!(
16323            frame.to_jsonl_string().expect("jsonl through extension"),
16324            write_jsonl_string(&frame).expect("jsonl free fn")
16325        );
16326        let html_options = HtmlWriteOptions {
16327            include_index: false,
16328            ..HtmlWriteOptions::default()
16329        };
16330        assert_eq!(
16331            frame
16332                .to_html_string_with_options(&html_options)
16333                .expect("html options through extension"),
16334            write_html_string_with_options(&frame, &html_options).expect("html options free fn")
16335        );
16336        let xml_options = XmlWriteOptions {
16337            include_index: false,
16338            root_name: "records".to_owned(),
16339            row_name: "record".to_owned(),
16340            index_label: None,
16341        };
16342        assert_eq!(
16343            frame
16344                .to_xml_string_with_options(&xml_options)
16345                .expect("xml options through extension"),
16346            write_xml_string_with_options(&frame, &xml_options).expect("xml options free fn")
16347        );
16348
16349        let parquet = frame
16350            .to_parquet_bytes()
16351            .expect("parquet bytes through extension");
16352        assert_eq!(
16353            read_parquet_bytes(&parquet)
16354                .expect("parquet roundtrip")
16355                .index()
16356                .len(),
16357            frame.index().len()
16358        );
16359        let orc = frame.to_orc_bytes().expect("orc bytes through extension");
16360        assert_eq!(
16361            read_orc_bytes(&orc).expect("orc roundtrip").index().len(),
16362            frame.index().len()
16363        );
16364        let feather = frame
16365            .to_feather_bytes()
16366            .expect("feather bytes through extension");
16367        assert_eq!(
16368            read_feather_bytes(&feather)
16369                .expect("feather roundtrip")
16370                .index()
16371                .len(),
16372            frame.index().len()
16373        );
16374        let excel = frame
16375            .to_excel_bytes()
16376            .expect("excel bytes through extension");
16377        assert_eq!(
16378            read_excel_bytes(&excel, &ExcelReadOptions::default())
16379                .expect("excel roundtrip")
16380                .index()
16381                .len(),
16382            frame.index().len()
16383        );
16384    }
16385
16386    fn make_row_multiindex_test_dataframe() -> DataFrame {
16387        let df = DataFrame::from_dict(
16388            &["region", "product", "year", "sales", "cost"],
16389            vec![
16390                (
16391                    "region",
16392                    vec![
16393                        Scalar::Utf8("north".into()),
16394                        Scalar::Utf8("north".into()),
16395                        Scalar::Utf8("south".into()),
16396                    ],
16397                ),
16398                (
16399                    "product",
16400                    vec![
16401                        Scalar::Utf8("apple".into()),
16402                        Scalar::Utf8("pear".into()),
16403                        Scalar::Utf8("apple".into()),
16404                    ],
16405                ),
16406                (
16407                    "year",
16408                    vec![
16409                        Scalar::Int64(2023),
16410                        Scalar::Int64(2024),
16411                        Scalar::Int64(2023),
16412                    ],
16413                ),
16414                (
16415                    "sales",
16416                    vec![Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)],
16417                ),
16418                (
16419                    "cost",
16420                    vec![Scalar::Int64(4), Scalar::Int64(7), Scalar::Int64(12)],
16421                ),
16422            ],
16423        )
16424        .unwrap();
16425        df.set_index_multi(&["region", "product", "year"], true, "|")
16426            .unwrap()
16427    }
16428
16429    #[test]
16430    fn parquet_bytes_roundtrip() {
16431        let frame = make_test_dataframe();
16432        let bytes = super::write_parquet_bytes(&frame).expect("write parquet");
16433        assert!(!bytes.is_empty());
16434
16435        let frame2 = super::read_parquet_bytes(&bytes).expect("read parquet");
16436        assert_eq!(frame2.index().len(), 3);
16437        assert_eq!(
16438            frame2
16439                .column_names()
16440                .iter()
16441                .map(|s| s.as_str())
16442                .collect::<Vec<_>>(),
16443            vec!["ints", "floats", "names"]
16444        );
16445
16446        // Check values round-tripped correctly
16447        let ints = frame2.column("ints").unwrap();
16448        assert_eq!(ints.values()[0], Scalar::Int64(10));
16449        assert_eq!(ints.values()[1], Scalar::Int64(20));
16450        assert_eq!(ints.values()[2], Scalar::Int64(30));
16451
16452        let floats = frame2.column("floats").unwrap();
16453        assert_eq!(floats.values()[0], Scalar::Float64(1.5));
16454        assert_eq!(floats.values()[1], Scalar::Float64(2.5));
16455        assert_eq!(floats.values()[2], Scalar::Float64(3.5));
16456
16457        let names = frame2.column("names").unwrap();
16458        assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
16459        assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
16460        assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
16461    }
16462
16463    #[test]
16464    fn parquet_row_multiindex_roundtrip_restores_logical_row_axis() {
16465        let frame = make_row_multiindex_test_dataframe();
16466        let bytes = super::write_parquet_bytes(&frame).expect("write parquet");
16467        let roundtrip = super::read_parquet_bytes(&bytes).expect("read parquet");
16468
16469        assert!(roundtrip.equals(&frame));
16470        assert!(roundtrip.column("__index_level_0__").is_none());
16471        assert_eq!(
16472            roundtrip
16473                .row_multiindex()
16474                .expect("row multiindex should be restored")
16475                .get_level_values(0)
16476                .unwrap()
16477                .labels(),
16478            frame
16479                .row_multiindex()
16480                .expect("source row multiindex")
16481                .get_level_values(0)
16482                .unwrap()
16483                .labels()
16484        );
16485    }
16486
16487    #[test]
16488    fn parquet_file_roundtrip() {
16489        let frame = make_test_dataframe();
16490        let dir = std::env::temp_dir();
16491        let path = dir.join("fp_io_test_parquet_roundtrip.parquet");
16492
16493        super::write_parquet(&frame, &path).expect("write parquet file");
16494        let frame2 = super::read_parquet(&path).expect("read parquet file");
16495        assert_eq!(frame2.index().len(), 3);
16496        assert_eq!(
16497            frame2.column("ints").unwrap().values()[0],
16498            Scalar::Int64(10)
16499        );
16500        std::fs::remove_file(&path).ok();
16501    }
16502
16503    #[test]
16504    fn parquet_with_nulls() {
16505        use fp_types::DType;
16506
16507        let mut columns = BTreeMap::new();
16508        columns.insert(
16509            "vals".to_string(),
16510            Column::new(
16511                DType::Float64,
16512                vec![
16513                    Scalar::Float64(1.0),
16514                    Scalar::Null(NullKind::NaN),
16515                    Scalar::Float64(3.0),
16516                ],
16517            )
16518            .unwrap(),
16519        );
16520        columns.insert(
16521            "strs".to_string(),
16522            Column::from_values(vec![
16523                Scalar::Utf8("a".into()),
16524                Scalar::Null(NullKind::Null),
16525                Scalar::Utf8("c".into()),
16526            ])
16527            .unwrap(),
16528        );
16529
16530        let labels = vec![
16531            IndexLabel::Int64(0),
16532            IndexLabel::Int64(1),
16533            IndexLabel::Int64(2),
16534        ];
16535        let frame = DataFrame::new_with_column_order(
16536            Index::new(labels),
16537            columns,
16538            vec!["vals".to_string(), "strs".to_string()],
16539        )
16540        .unwrap();
16541
16542        let bytes = super::write_parquet_bytes(&frame).expect("write");
16543        let frame2 = super::read_parquet_bytes(&bytes).expect("read");
16544
16545        assert_eq!(
16546            frame2.column("vals").unwrap().values()[0],
16547            Scalar::Float64(1.0)
16548        );
16549        assert!(frame2.column("vals").unwrap().values()[1].is_missing());
16550        assert_eq!(
16551            frame2.column("vals").unwrap().values()[2],
16552            Scalar::Float64(3.0)
16553        );
16554
16555        assert_eq!(
16556            frame2.column("strs").unwrap().values()[0],
16557            Scalar::Utf8("a".into())
16558        );
16559        assert!(frame2.column("strs").unwrap().values()[1].is_missing());
16560        assert_eq!(
16561            frame2.column("strs").unwrap().values()[2],
16562            Scalar::Utf8("c".into())
16563        );
16564    }
16565
16566    #[test]
16567    fn parquet_bool_column() {
16568        use fp_types::DType;
16569
16570        let mut columns = BTreeMap::new();
16571        columns.insert(
16572            "flags".to_string(),
16573            Column::new(
16574                DType::Bool,
16575                vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
16576            )
16577            .unwrap(),
16578        );
16579
16580        let labels = vec![
16581            IndexLabel::Int64(0),
16582            IndexLabel::Int64(1),
16583            IndexLabel::Int64(2),
16584        ];
16585        let frame = DataFrame::new_with_column_order(
16586            Index::new(labels),
16587            columns,
16588            vec!["flags".to_string()],
16589        )
16590        .unwrap();
16591
16592        let bytes = super::write_parquet_bytes(&frame).expect("write");
16593        let frame2 = super::read_parquet_bytes(&bytes).expect("read");
16594
16595        assert_eq!(
16596            frame2.column("flags").unwrap().values()[0],
16597            Scalar::Bool(true)
16598        );
16599        assert_eq!(
16600            frame2.column("flags").unwrap().values()[1],
16601            Scalar::Bool(false)
16602        );
16603        assert_eq!(
16604            frame2.column("flags").unwrap().values()[2],
16605            Scalar::Bool(true)
16606        );
16607    }
16608
16609    #[test]
16610    fn parquet_empty_dataframe_errors() {
16611        // Parquet format requires at least one column — empty DataFrames
16612        // cannot be represented, matching pandas behavior where
16613        // pd.DataFrame().to_parquet() also fails.
16614        let frame =
16615            DataFrame::new_with_column_order(Index::new(vec![]), BTreeMap::new(), vec![]).unwrap();
16616
16617        let result = super::write_parquet_bytes(&frame);
16618        assert!(result.is_err());
16619    }
16620
16621    #[test]
16622    fn orc_bytes_roundtrip_preserves_supported_columns() {
16623        let frame = make_test_dataframe();
16624        let bytes = write_orc_bytes(&frame).expect("write orc");
16625        assert!(bytes.starts_with(b"ORC"));
16626
16627        let frame2 = read_orc_bytes(&bytes).expect("read orc");
16628        assert_eq!(frame2.index().len(), 3);
16629        assert_eq!(
16630            frame2
16631                .column_names()
16632                .iter()
16633                .map(|s| s.as_str())
16634                .collect::<Vec<_>>(),
16635            vec!["ints", "floats", "names"]
16636        );
16637
16638        assert_eq!(
16639            frame2.column("ints").unwrap().values()[0],
16640            Scalar::Int64(10)
16641        );
16642        assert_eq!(
16643            frame2.column("floats").unwrap().values()[1],
16644            Scalar::Float64(2.5)
16645        );
16646        assert_eq!(
16647            frame2.column("names").unwrap().values()[2],
16648            Scalar::Utf8("carol".into())
16649        );
16650    }
16651
16652    #[test]
16653    fn orc_file_and_extension_aliases_roundtrip() {
16654        use super::DataFrameIoExt;
16655
16656        let frame = make_test_dataframe();
16657        let free_path = std::env::temp_dir().join(format!(
16658            "fp_io_orc_free_{}_{}.orc",
16659            std::process::id(),
16660            line!()
16661        ));
16662        let trait_path = std::env::temp_dir().join(format!(
16663            "fp_io_orc_trait_{}_{}.orc",
16664            std::process::id(),
16665            line!()
16666        ));
16667
16668        write_orc(&frame, &free_path).expect("write orc path");
16669        let free_roundtrip = read_orc(&free_path).expect("read orc path");
16670        assert!(free_roundtrip.equals(&frame));
16671
16672        frame.to_orc_file(&trait_path).expect("trait orc path");
16673        let trait_roundtrip = read_orc(&trait_path).expect("read trait orc path");
16674        assert!(trait_roundtrip.equals(&frame));
16675
16676        let bytes = frame.to_orc_bytes().expect("trait orc bytes");
16677        assert!(
16678            read_orc_bytes(&bytes)
16679                .expect("read trait orc bytes")
16680                .equals(&frame)
16681        );
16682    }
16683
16684    #[test]
16685    fn orc_row_multiindex_roundtrip_restores_logical_row_axis() {
16686        let frame = make_row_multiindex_test_dataframe();
16687        let bytes = write_orc_bytes(&frame).expect("write orc");
16688        let roundtrip = read_orc_bytes(&bytes).expect("read orc");
16689
16690        assert!(roundtrip.equals(&frame));
16691        assert!(roundtrip.column("__index_level_0__").is_none());
16692        assert_eq!(
16693            roundtrip
16694                .row_multiindex()
16695                .expect("row multiindex should be restored")
16696                .get_level_values(0)
16697                .unwrap()
16698                .labels(),
16699            frame
16700                .row_multiindex()
16701                .expect("source row multiindex")
16702                .get_level_values(0)
16703                .unwrap()
16704                .labels()
16705        );
16706    }
16707
16708    #[test]
16709    fn orc_reader_rejects_malformed_input() {
16710        let err = read_orc_bytes(b"not an orc file").expect_err("malformed orc should fail");
16711        assert!(matches!(err, IoError::Orc(_)));
16712    }
16713
16714    // ── Excel I/O tests ──────────────────────────────────────────────
16715
16716    #[test]
16717    fn write_excel_with_options_custom_sheet_name_survives_round_trip() {
16718        let frame = make_test_dataframe();
16719        let bytes = super::write_excel_bytes_with_options(
16720            &frame,
16721            &super::ExcelWriteOptions {
16722                sheet_name: "Results".to_string(),
16723                ..super::ExcelWriteOptions::default()
16724            },
16725        )
16726        .expect("write");
16727        let sheets =
16728            super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16729                .expect("read");
16730        assert_eq!(sheets.len(), 1);
16731        assert!(sheets.contains_key("Results"));
16732    }
16733
16734    #[test]
16735    fn write_excel_with_options_index_false_omits_index_column() {
16736        let frame = make_test_dataframe();
16737        let bytes = super::write_excel_bytes_with_options(
16738            &frame,
16739            &super::ExcelWriteOptions {
16740                index: false,
16741                ..super::ExcelWriteOptions::default()
16742            },
16743        )
16744        .expect("write");
16745        let frame2 =
16746            super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16747        // With index=false the first column is "ints" directly (no
16748        // anonymous leading index column).
16749        let names = frame2.column_names();
16750        assert_eq!(
16751            names.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
16752            vec!["ints", "floats", "names"]
16753        );
16754    }
16755
16756    #[test]
16757    fn write_excel_with_options_index_label_overrides_header() {
16758        let frame = make_test_dataframe();
16759        let bytes = super::write_excel_bytes_with_options(
16760            &frame,
16761            &super::ExcelWriteOptions {
16762                index_label: Some("row_id".to_string()),
16763                ..super::ExcelWriteOptions::default()
16764            },
16765        )
16766        .expect("write");
16767        let frame2 =
16768            super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16769        // The index column now shows up as "row_id" before the data columns.
16770        let names = frame2.column_names();
16771        assert_eq!(names[0], "row_id");
16772    }
16773
16774    #[test]
16775    fn write_excel_with_options_header_false_omits_header_row() {
16776        let frame = make_test_dataframe();
16777        let bytes = super::write_excel_bytes_with_options(
16778            &frame,
16779            &super::ExcelWriteOptions {
16780                header: false,
16781                index: false,
16782                ..super::ExcelWriteOptions::default()
16783            },
16784        )
16785        .expect("write");
16786        // Without header, the reader treats row 0 as headers. We
16787        // expect the first data row to become the column names
16788        // instead of literal "ints"/"floats"/"names".
16789        let frame2 =
16790            super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default()).expect("read");
16791        let names = frame2.column_names();
16792        let name_strs: Vec<&str> = names.iter().map(|s| s.as_str()).collect();
16793        assert!(!name_strs.contains(&"ints"));
16794    }
16795
16796    #[test]
16797    fn write_excel_with_options_default_matches_write_excel_bytes() {
16798        let frame = make_test_dataframe();
16799        let default_bytes = super::write_excel_bytes(&frame).expect("default");
16800        let options_bytes =
16801            super::write_excel_bytes_with_options(&frame, &super::ExcelWriteOptions::default())
16802                .expect("options");
16803        assert_eq!(default_bytes, options_bytes);
16804    }
16805
16806    #[test]
16807    fn excel_multiindex_roundtrip_with_explicit_index_cols() {
16808        let frame = make_row_multiindex_test_dataframe();
16809        let bytes =
16810            super::write_excel_bytes_with_options(&frame, &super::ExcelWriteOptions::default())
16811                .expect("write");
16812        let roundtrip = super::read_excel_bytes_with_index_cols(
16813            &bytes,
16814            &super::ExcelReadOptions::default(),
16815            &["region", "product", "year"],
16816        )
16817        .expect("read");
16818
16819        assert!(roundtrip.equals(&frame));
16820        assert_eq!(roundtrip.row_multiindex(), frame.row_multiindex());
16821    }
16822
16823    fn build_two_sheet_workbook_bytes() -> Vec<u8> {
16824        use rust_xlsxwriter::Workbook;
16825        let mut workbook = Workbook::new();
16826        let sheet1 = workbook.add_worksheet();
16827        sheet1.set_name("Alpha").expect("sheet name");
16828        sheet1.write_string(0, 0, "a").expect("header");
16829        sheet1.write_string(0, 1, "b").expect("header");
16830        sheet1.write_number(1, 0, 1.0).expect("data");
16831        sheet1.write_number(1, 1, 10.0).expect("data");
16832        sheet1.write_number(2, 0, 2.0).expect("data");
16833        sheet1.write_number(2, 1, 20.0).expect("data");
16834
16835        let sheet2 = workbook.add_worksheet();
16836        sheet2.set_name("Bravo").expect("sheet name");
16837        sheet2.write_string(0, 0, "name").expect("header");
16838        sheet2.write_string(1, 0, "alice").expect("data");
16839        sheet2.write_string(2, 0, "bob").expect("data");
16840
16841        let sheet3 = workbook.add_worksheet();
16842        sheet3.set_name("Charlie").expect("sheet name");
16843        sheet3.write_string(0, 0, "x").expect("header");
16844        sheet3.write_number(1, 0, 99.0).expect("data");
16845
16846        workbook.save_to_buffer().expect("save")
16847    }
16848
16849    #[test]
16850    fn read_excel_sheets_ordered_bytes_preserves_workbook_order() {
16851        // Workbook sheet order: Alpha, Bravo, Charlie. A sorted map
16852        // would still give Alpha/Bravo/Charlie alphabetically — but
16853        // pandas guarantees workbook order regardless of alphabetic
16854        // relationship, so this test uses a fixture where the ordered
16855        // result differs from sorted order.
16856        use rust_xlsxwriter::Workbook;
16857        let mut workbook = Workbook::new();
16858        let s1 = workbook.add_worksheet();
16859        s1.set_name("Zulu").expect("name");
16860        s1.write_string(0, 0, "v").expect("header");
16861        s1.write_number(1, 0, 1.0).expect("data");
16862        let s2 = workbook.add_worksheet();
16863        s2.set_name("Alpha").expect("name");
16864        s2.write_string(0, 0, "v").expect("header");
16865        s2.write_number(1, 0, 2.0).expect("data");
16866        let s3 = workbook.add_worksheet();
16867        s3.set_name("Mike").expect("name");
16868        s3.write_string(0, 0, "v").expect("header");
16869        s3.write_number(1, 0, 3.0).expect("data");
16870        let bytes = workbook.save_to_buffer().expect("save");
16871
16872        let ordered = super::read_excel_sheets_ordered_bytes(
16873            &bytes,
16874            None,
16875            &super::ExcelReadOptions::default(),
16876        )
16877        .expect("read ordered");
16878        assert_eq!(
16879            ordered.iter().map(|(k, _)| k.as_str()).collect::<Vec<_>>(),
16880            vec!["Zulu", "Alpha", "Mike"],
16881            "ordered form preserves workbook order"
16882        );
16883
16884        // Sorted form alphabetizes (existing contract for BTreeMap).
16885        let sorted =
16886            super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16887                .expect("read sorted");
16888        assert_eq!(
16889            sorted.keys().map(String::as_str).collect::<Vec<_>>(),
16890            vec!["Alpha", "Mike", "Zulu"],
16891            "BTreeMap form alphabetizes"
16892        );
16893    }
16894
16895    #[test]
16896    fn read_excel_sheets_ordered_bytes_selected_subset_keeps_caller_order() {
16897        let bytes = build_two_sheet_workbook_bytes();
16898        // Caller-specified order: Charlie, Alpha — deliberately reversed
16899        // from workbook order. Pandas docs say sheet_name=[list] returns
16900        // a dict whose iteration reflects the argument order; we match.
16901        let req = vec!["Charlie".to_string(), "Alpha".to_string()];
16902        let ordered = super::read_excel_sheets_ordered_bytes(
16903            &bytes,
16904            Some(&req),
16905            &super::ExcelReadOptions::default(),
16906        )
16907        .expect("ordered subset");
16908        assert_eq!(
16909            ordered.iter().map(|(k, _)| k.as_str()).collect::<Vec<_>>(),
16910            vec!["Charlie", "Alpha"]
16911        );
16912    }
16913
16914    #[test]
16915    fn read_excel_sheets_ordered_path_matches_bytes() {
16916        let bytes = build_two_sheet_workbook_bytes();
16917        let temp = std::env::temp_dir().join("fp_io_wrt3_ordered.xlsx");
16918        std::fs::write(&temp, &bytes).expect("write temp");
16919        let via_path =
16920            super::read_excel_sheets_ordered(&temp, None, &super::ExcelReadOptions::default())
16921                .expect("read path");
16922        let via_bytes = super::read_excel_sheets_ordered_bytes(
16923            &bytes,
16924            None,
16925            &super::ExcelReadOptions::default(),
16926        )
16927        .expect("read bytes");
16928        assert_eq!(
16929            via_path.iter().map(|(k, _)| k.clone()).collect::<Vec<_>>(),
16930            via_bytes.iter().map(|(k, _)| k.clone()).collect::<Vec<_>>()
16931        );
16932    }
16933
16934    #[test]
16935    fn read_excel_sheets_bytes_all_sheets_returns_map() {
16936        let bytes = build_two_sheet_workbook_bytes();
16937        let sheets =
16938            super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16939                .expect("read sheets");
16940        assert_eq!(sheets.len(), 3);
16941        assert!(sheets.contains_key("Alpha"));
16942        assert!(sheets.contains_key("Bravo"));
16943        assert!(sheets.contains_key("Charlie"));
16944
16945        let alpha = &sheets["Alpha"];
16946        assert_eq!(alpha.index().len(), 2);
16947        assert_eq!(alpha.column_names().len(), 2);
16948
16949        let bravo = &sheets["Bravo"];
16950        assert_eq!(bravo.index().len(), 2);
16951        assert_eq!(
16952            bravo.column("name").unwrap().values()[0],
16953            Scalar::Utf8("alice".into())
16954        );
16955    }
16956
16957    #[test]
16958    fn read_excel_sheets_bytes_selects_subset() {
16959        let bytes = build_two_sheet_workbook_bytes();
16960        let selected = vec!["Alpha".to_string(), "Charlie".to_string()];
16961        let sheets = super::read_excel_sheets_bytes(
16962            &bytes,
16963            Some(&selected),
16964            &super::ExcelReadOptions::default(),
16965        )
16966        .expect("read subset");
16967        assert_eq!(sheets.len(), 2);
16968        assert!(sheets.contains_key("Alpha"));
16969        assert!(sheets.contains_key("Charlie"));
16970        assert!(!sheets.contains_key("Bravo"));
16971    }
16972
16973    #[test]
16974    fn read_excel_sheets_bytes_unknown_sheet_errors() {
16975        let bytes = build_two_sheet_workbook_bytes();
16976        let bogus = vec!["Zeta".to_string()];
16977        let err = super::read_excel_sheets_bytes(
16978            &bytes,
16979            Some(&bogus),
16980            &super::ExcelReadOptions::default(),
16981        )
16982        .unwrap_err();
16983        assert!(matches!(err, super::IoError::Excel(_)));
16984    }
16985
16986    #[test]
16987    fn read_excel_sheets_path_matches_bytes() {
16988        let bytes = build_two_sheet_workbook_bytes();
16989        let temp = std::env::temp_dir().join("fp_io_9my2_multisheet.xlsx");
16990        std::fs::write(&temp, &bytes).expect("write temp");
16991        let via_path = super::read_excel_sheets(&temp, None, &super::ExcelReadOptions::default())
16992            .expect("read path");
16993        let via_bytes =
16994            super::read_excel_sheets_bytes(&bytes, None, &super::ExcelReadOptions::default())
16995                .expect("read bytes");
16996        assert_eq!(
16997            via_path.keys().collect::<Vec<_>>(),
16998            via_bytes.keys().collect::<Vec<_>>()
16999        );
17000    }
17001
17002    #[test]
17003    fn excel_bytes_roundtrip() {
17004        let frame = make_test_dataframe();
17005        let bytes = super::write_excel_bytes(&frame).expect("write excel");
17006        assert!(!bytes.is_empty());
17007
17008        let frame2 = super::read_excel_bytes(
17009            &bytes,
17010            &super::ExcelReadOptions {
17011                index_col: Some("column_0".into()),
17012                ..Default::default()
17013            },
17014        )
17015        .expect("read excel");
17016        assert_eq!(frame2.index().len(), 3);
17017        assert_eq!(frame2.index().labels(), frame.index().labels());
17018        assert_eq!(frame2.index().name(), None);
17019        // Excel preserves the write-time column order (ints, floats, names).
17020        assert_eq!(
17021            frame2
17022                .column_names()
17023                .iter()
17024                .map(|s| s.as_str())
17025                .collect::<Vec<_>>(),
17026            vec!["ints", "floats", "names"]
17027        );
17028
17029        // Int values survive round-trip (Excel stores as f64, we recover Int64).
17030        let ints = frame2.column("ints").unwrap();
17031        assert_eq!(ints.values()[0], Scalar::Int64(10));
17032        assert_eq!(ints.values()[1], Scalar::Int64(20));
17033        assert_eq!(ints.values()[2], Scalar::Int64(30));
17034
17035        // Float values survive round-trip.
17036        let floats = frame2.column("floats").unwrap();
17037        assert_eq!(floats.values()[0], Scalar::Float64(1.5));
17038        assert_eq!(floats.values()[1], Scalar::Float64(2.5));
17039        assert_eq!(floats.values()[2], Scalar::Float64(3.5));
17040
17041        // String values survive round-trip.
17042        let names = frame2.column("names").unwrap();
17043        assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
17044        assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
17045        assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
17046    }
17047
17048    #[test]
17049    fn excel_file_roundtrip() {
17050        let frame = make_test_dataframe();
17051        let dir = std::env::temp_dir();
17052        let path = dir.join("fp_io_test_excel_roundtrip.xlsx");
17053
17054        super::write_excel(&frame, &path).expect("write excel file");
17055        let frame2 = super::read_excel(
17056            &path,
17057            &super::ExcelReadOptions {
17058                index_col: Some("column_0".into()),
17059                ..Default::default()
17060            },
17061        )
17062        .expect("read excel file");
17063        assert_eq!(frame2.index().len(), 3);
17064        assert_eq!(frame2.index().labels(), frame.index().labels());
17065        assert_eq!(
17066            frame2.column("ints").unwrap().values()[0],
17067            Scalar::Int64(10)
17068        );
17069        std::fs::remove_file(&path).ok();
17070    }
17071
17072    #[test]
17073    fn excel_with_nulls() {
17074        use fp_types::DType;
17075
17076        let mut columns = BTreeMap::new();
17077        columns.insert(
17078            "vals".to_string(),
17079            Column::new(
17080                DType::Float64,
17081                vec![
17082                    Scalar::Float64(1.0),
17083                    Scalar::Null(NullKind::NaN),
17084                    Scalar::Float64(3.0),
17085                ],
17086            )
17087            .unwrap(),
17088        );
17089
17090        let labels = vec![
17091            IndexLabel::Int64(0),
17092            IndexLabel::Int64(1),
17093            IndexLabel::Int64(2),
17094        ];
17095        let frame =
17096            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
17097                .unwrap();
17098
17099        let bytes = super::write_excel_bytes(&frame).expect("write");
17100        let frame2 = super::read_excel_bytes(
17101            &bytes,
17102            &super::ExcelReadOptions {
17103                index_col: Some("column_0".into()),
17104                ..Default::default()
17105            },
17106        )
17107        .expect("read");
17108
17109        // Non-null values round-trip.
17110        assert_eq!(frame2.column("vals").unwrap().values()[0], Scalar::Int64(1));
17111        // NaN written as empty cell, read back as Null.
17112        assert!(frame2.column("vals").unwrap().values()[1].is_missing());
17113        assert_eq!(frame2.column("vals").unwrap().values()[2], Scalar::Int64(3));
17114    }
17115
17116    #[test]
17117    fn excel_bool_column() {
17118        use fp_types::DType;
17119
17120        let mut columns = BTreeMap::new();
17121        columns.insert(
17122            "flags".to_string(),
17123            Column::new(
17124                DType::Bool,
17125                vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
17126            )
17127            .unwrap(),
17128        );
17129
17130        let labels = vec![
17131            IndexLabel::Int64(0),
17132            IndexLabel::Int64(1),
17133            IndexLabel::Int64(2),
17134        ];
17135        let frame = DataFrame::new_with_column_order(
17136            Index::new(labels),
17137            columns,
17138            vec!["flags".to_string()],
17139        )
17140        .unwrap();
17141
17142        let bytes = super::write_excel_bytes(&frame).expect("write");
17143        let frame2 = super::read_excel_bytes(
17144            &bytes,
17145            &super::ExcelReadOptions {
17146                index_col: Some("column_0".into()),
17147                ..Default::default()
17148            },
17149        )
17150        .expect("read");
17151
17152        assert_eq!(
17153            frame2.column("flags").unwrap().values()[0],
17154            Scalar::Bool(true)
17155        );
17156        assert_eq!(
17157            frame2.column("flags").unwrap().values()[1],
17158            Scalar::Bool(false)
17159        );
17160        assert_eq!(
17161            frame2.column("flags").unwrap().values()[2],
17162            Scalar::Bool(true)
17163        );
17164    }
17165
17166    #[test]
17167    fn excel_skip_rows() {
17168        // Build an xlsx with 5 data rows, then read with skip_rows=2 to skip
17169        // 2 rows before the header.
17170        use fp_types::DType;
17171
17172        let mut columns = BTreeMap::new();
17173        columns.insert(
17174            "x".to_string(),
17175            Column::new(DType::Int64, vec![Scalar::Int64(1), Scalar::Int64(2)]).unwrap(),
17176        );
17177        let labels = vec![IndexLabel::Int64(0), IndexLabel::Int64(1)];
17178        let frame =
17179            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["x".to_string()])
17180                .unwrap();
17181
17182        let bytes = super::write_excel_bytes(&frame).expect("write");
17183        let frame2 = super::read_excel_bytes(
17184            &bytes,
17185            &super::ExcelReadOptions {
17186                skip_rows: 1,
17187                has_headers: false,
17188                ..Default::default()
17189            },
17190        )
17191        .expect("read with skip");
17192
17193        // Skipped the header row, so first data row becomes first row.
17194        // With has_headers=false, column names are auto-generated.
17195        assert_eq!(frame2.index().len(), 2);
17196        assert!(frame2.column("column_0").is_some());
17197    }
17198
17199    #[test]
17200    fn excel_header_none_with_explicit_names_uses_names_and_keeps_first_row() {
17201        let rows = vec![
17202            vec![
17203                calamine::Data::Int(1),
17204                calamine::Data::String("alpha".to_owned()),
17205            ],
17206            vec![
17207                calamine::Data::Int(2),
17208                calamine::Data::String("beta".to_owned()),
17209            ],
17210        ];
17211
17212        let frame = super::parse_excel_rows(
17213            rows,
17214            &super::ExcelReadOptions {
17215                has_headers: false,
17216                names: Some(vec!["id".to_owned(), "label".to_owned()]),
17217                ..Default::default()
17218            },
17219        )
17220        .expect("parse excel rows with explicit names");
17221
17222        assert_eq!(frame.column_names(), vec!["id", "label"]);
17223        assert_eq!(frame.index().len(), 2);
17224        assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
17225        assert_eq!(
17226            frame.column("label").unwrap().values()[0],
17227            Scalar::Utf8("alpha".into())
17228        );
17229        assert_eq!(frame.column("id").unwrap().values()[1], Scalar::Int64(2));
17230    }
17231
17232    #[test]
17233    fn excel_header_none_with_explicit_names_preserves_index_name() {
17234        let rows = vec![
17235            vec![
17236                calamine::Data::Int(10),
17237                calamine::Data::String("alpha".to_owned()),
17238            ],
17239            vec![
17240                calamine::Data::Int(20),
17241                calamine::Data::String("beta".to_owned()),
17242            ],
17243        ];
17244
17245        let frame = super::parse_excel_rows(
17246            rows,
17247            &super::ExcelReadOptions {
17248                has_headers: false,
17249                names: Some(vec!["row_id".to_owned(), "value".to_owned()]),
17250                index_col: Some("row_id".to_owned()),
17251                ..Default::default()
17252            },
17253        )
17254        .expect("parse excel rows with named index column");
17255
17256        assert_eq!(frame.index().name(), Some("row_id"));
17257        assert_eq!(frame.index().labels()[0], IndexLabel::Int64(10));
17258        assert_eq!(frame.index().labels()[1], IndexLabel::Int64(20));
17259        assert!(frame.column("row_id").is_none());
17260        assert_eq!(
17261            frame.column("value").unwrap().values(),
17262            &[Scalar::Utf8("alpha".into()), Scalar::Utf8("beta".into())]
17263        );
17264    }
17265
17266    #[test]
17267    fn excel_explicit_names_width_mismatch_errors() {
17268        let rows = vec![vec![calamine::Data::Int(1), calamine::Data::Int(2)]];
17269
17270        let err = super::parse_excel_rows(
17271            rows,
17272            &super::ExcelReadOptions {
17273                has_headers: false,
17274                names: Some(vec!["only_one".to_owned()]),
17275                ..Default::default()
17276            },
17277        )
17278        .expect_err("names width mismatch should error");
17279
17280        assert!(
17281            matches!(err, IoError::Excel(message) if message.contains("expected 2 column names, got 1"))
17282        );
17283    }
17284
17285    #[test]
17286    fn excel_usecols_selects_subset_in_sheet_order() {
17287        let rows = vec![
17288            vec![
17289                calamine::Data::String("a".to_owned()),
17290                calamine::Data::String("b".to_owned()),
17291                calamine::Data::String("c".to_owned()),
17292            ],
17293            vec![
17294                calamine::Data::Int(1),
17295                calamine::Data::Int(2),
17296                calamine::Data::Int(3),
17297            ],
17298        ];
17299
17300        let frame = super::parse_excel_rows(
17301            rows,
17302            &super::ExcelReadOptions {
17303                usecols: Some(vec!["c".to_owned(), "a".to_owned()]),
17304                ..Default::default()
17305            },
17306        )
17307        .expect("parse excel rows with usecols");
17308
17309        assert_eq!(frame.column_names(), vec!["a", "c"]);
17310        assert_eq!(frame.column("a").unwrap().values(), &[Scalar::Int64(1)]);
17311        assert_eq!(frame.column("c").unwrap().values(), &[Scalar::Int64(3)]);
17312        assert!(frame.column("b").is_none());
17313    }
17314
17315    #[test]
17316    fn excel_usecols_with_explicit_names_filters_renamed_columns() {
17317        let rows = vec![
17318            vec![
17319                calamine::Data::Int(1),
17320                calamine::Data::String("alpha".to_owned()),
17321            ],
17322            vec![
17323                calamine::Data::Int(2),
17324                calamine::Data::String("beta".to_owned()),
17325            ],
17326        ];
17327
17328        let frame = super::parse_excel_rows(
17329            rows,
17330            &super::ExcelReadOptions {
17331                has_headers: false,
17332                names: Some(vec!["id".to_owned(), "label".to_owned()]),
17333                usecols: Some(vec!["label".to_owned()]),
17334                ..Default::default()
17335            },
17336        )
17337        .expect("parse headerless excel rows with names and usecols");
17338
17339        assert_eq!(frame.column_names(), vec!["label"]);
17340        assert_eq!(
17341            frame.column("label").unwrap().values(),
17342            &[Scalar::Utf8("alpha".into()), Scalar::Utf8("beta".into())]
17343        );
17344        assert!(frame.column("id").is_none());
17345    }
17346
17347    #[test]
17348    fn excel_usecols_missing_column_errors() {
17349        let rows = vec![
17350            vec![
17351                calamine::Data::String("a".to_owned()),
17352                calamine::Data::String("b".to_owned()),
17353            ],
17354            vec![calamine::Data::Int(1), calamine::Data::Int(2)],
17355        ];
17356
17357        let err = super::parse_excel_rows(
17358            rows,
17359            &super::ExcelReadOptions {
17360                usecols: Some(vec!["missing".to_owned()]),
17361                ..Default::default()
17362            },
17363        )
17364        .expect_err("missing excel usecols should error");
17365
17366        assert!(
17367            matches!(err, IoError::MissingUsecols(missing) if missing == vec!["missing".to_owned()])
17368        );
17369    }
17370
17371    #[test]
17372    fn excel_default_read_promotes_writer_range_index_back_to_index() {
17373        let frame = make_test_dataframe();
17374        let bytes = super::write_excel_bytes(&frame).expect("write excel");
17375
17376        let frame2 = super::read_excel_bytes(&bytes, &super::ExcelReadOptions::default())
17377            .expect("read excel");
17378
17379        assert_eq!(frame2.index().labels(), frame.index().labels());
17380        assert_eq!(frame2.index().name(), None);
17381        assert_eq!(frame2.column_names(), vec!["ints", "floats", "names"],);
17382        assert!(frame2.column("column_0").is_none());
17383    }
17384
17385    #[test]
17386    fn excel_default_read_keeps_non_range_generated_leading_column_as_data() {
17387        let rows = vec![
17388            vec![
17389                calamine::Data::Empty,
17390                calamine::Data::String("value".to_owned()),
17391            ],
17392            vec![calamine::Data::Int(10), calamine::Data::Int(1)],
17393            vec![calamine::Data::Int(20), calamine::Data::Int(2)],
17394        ];
17395
17396        let frame = super::parse_excel_rows(rows, &super::ExcelReadOptions::default())
17397            .expect("parse excel rows");
17398
17399        assert_eq!(
17400            frame.index().labels(),
17401            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
17402        );
17403        assert_eq!(frame.column_names(), vec!["column_0", "value"]);
17404        assert_eq!(
17405            frame.column("column_0").unwrap().values(),
17406            &[Scalar::Int64(10), Scalar::Int64(20)],
17407        );
17408    }
17409
17410    #[test]
17411    fn excel_named_index_roundtrip_preserves_index_name() {
17412        use fp_types::DType;
17413
17414        let mut columns = BTreeMap::new();
17415        columns.insert(
17416            "vals".to_string(),
17417            Column::new(DType::Int64, vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
17418        );
17419
17420        let frame = DataFrame::new_with_column_order(
17421            Index::new(vec![IndexLabel::Int64(10), IndexLabel::Int64(20)]).set_name("row_id"),
17422            columns,
17423            vec!["vals".to_string()],
17424        )
17425        .unwrap();
17426
17427        let bytes = super::write_excel_bytes(&frame).expect("write excel");
17428        let frame2 = super::read_excel_bytes(
17429            &bytes,
17430            &super::ExcelReadOptions {
17431                index_col: Some("row_id".into()),
17432                ..Default::default()
17433            },
17434        )
17435        .expect("read excel");
17436
17437        assert_eq!(frame2.index().labels(), frame.index().labels());
17438        assert_eq!(frame2.index().name(), Some("row_id"));
17439        assert!(frame2.column("row_id").is_none());
17440        assert_eq!(
17441            frame2.column("vals").unwrap().values(),
17442            &[Scalar::Int64(10), Scalar::Int64(20)]
17443        );
17444    }
17445
17446    #[test]
17447    fn excel_duplicate_headers_error() {
17448        let rows = vec![
17449            vec![
17450                calamine::Data::String("dup".to_owned()),
17451                calamine::Data::String("dup".to_owned()),
17452            ],
17453            vec![calamine::Data::Int(1), calamine::Data::Int(2)],
17454        ];
17455
17456        let err = super::parse_excel_rows(rows, &super::ExcelReadOptions::default())
17457            .expect_err("duplicate headers should error");
17458        assert!(matches!(err, IoError::DuplicateColumnName(_)));
17459    }
17460
17461    // ── SQL I/O tests ──────────────────────────────────────────────
17462    //
17463    // Per br-frankenpandas-7a49 (fd90.48): keep the import block
17464    // unconditional so stub-backend tests (which only need types and
17465    // free-fns, not rusqlite) compile under --no-default-features.
17466    // `#[allow(unused_imports)]` covers the few free fns that are
17467    // exclusively used inside the cfg-gated SQLite-backed tests.
17468
17469    // Per fd90.48: TYPE imports + introspection-helper free fns are
17470    // used by both stub-backend tests (which compile under
17471    // --no-default-features) and SQLite-backed tests. The
17472    // read_sql_* / write_sql_* row-materialization free fns are only
17473    // exercised inside SQLite-backed tests (cfg-gated below) so they
17474    // get their own gated import group to avoid unused warnings.
17475    // Per fd90.48: TYPE imports + introspection-helper free fns +
17476    // write_sql / write_sql_with_options are used by both stub-backend
17477    // tests (which compile under --no-default-features) and
17478    // SQLite-backed tests. The read_sql_* row-materialization free fns
17479    // are only exercised inside SQLite-backed tests (cfg-gated below).
17480    use super::{
17481        SqlBackendCaps, SqlColumnSchema, SqlForeignKeySchema, SqlIfExists, SqlIndexSchema,
17482        SqlInsertMethod, SqlInspector, SqlQueryResult, SqlReadOptions, SqlReflectedTable,
17483        SqlTableSchema, SqlUniqueConstraintSchema, SqlWriteOptions, list_sql_foreign_keys,
17484        list_sql_indexes, list_sql_schemas, list_sql_tables, list_sql_unique_constraints,
17485        list_sql_views, sql_backend_caps, sql_max_identifier_length, sql_max_insert_rows,
17486        sql_max_param_count, sql_primary_key_columns, sql_server_version, sql_supports_returning,
17487        sql_supports_schemas, sql_table_comment, sql_table_schema, truncate_sql_table, write_sql,
17488        write_sql_with_options,
17489    };
17490    #[cfg(feature = "sql-sqlite")]
17491    use super::{
17492        read_sql, read_sql_chunks, read_sql_chunks_with_index_col, read_sql_chunks_with_options,
17493        read_sql_chunks_with_options_and_index_col, read_sql_query, read_sql_query_chunks,
17494        read_sql_query_chunks_with_index_col, read_sql_query_chunks_with_options,
17495        read_sql_query_chunks_with_options_and_index_col, read_sql_query_with_index_col,
17496        read_sql_query_with_options, read_sql_query_with_options_and_index_col, read_sql_table,
17497        read_sql_table_chunks, read_sql_table_chunks_with_index_col,
17498        read_sql_table_chunks_with_options, read_sql_table_chunks_with_options_and_index_col,
17499        read_sql_table_columns, read_sql_table_columns_chunks,
17500        read_sql_table_columns_chunks_with_index_col, read_sql_table_columns_with_index_col,
17501        read_sql_table_with_index_col, read_sql_table_with_options,
17502        read_sql_table_with_options_and_index_col, read_sql_with_index_col, read_sql_with_options,
17503    };
17504
17505    // Per br-frankenpandas-7a49 (fd90.48): the helper itself only
17506    // exists when sql-sqlite is on, since it directly references
17507    // rusqlite::Connection. All tests that call this are also
17508    // cfg-gated on the same feature.
17509    #[cfg(feature = "sql-sqlite")]
17510    fn make_sql_test_conn() -> rusqlite::Connection {
17511        rusqlite::Connection::open_in_memory().expect("in-memory sqlite")
17512    }
17513
17514    #[cfg(feature = "sql-sqlite")]
17515    #[test]
17516    fn sql_read_with_index_col_promotes_named_column() {
17517        let frame = make_test_dataframe();
17518        let conn = make_sql_test_conn();
17519        write_sql(&frame, &conn, "indexed_tbl", SqlIfExists::Fail).expect("write");
17520
17521        // Promote the "ints" column to the row index. The data
17522        // columns should drop ints and the index labels should be the
17523        // ints values.
17524        let result = read_sql_table_with_index_col(&conn, "indexed_tbl", Some("ints"))
17525            .expect("read with index");
17526        assert_eq!(result.index().name(), Some("ints"));
17527        assert_eq!(result.index().labels()[0], crate::IndexLabel::Int64(10));
17528        assert_eq!(result.index().labels()[1], crate::IndexLabel::Int64(20));
17529        assert_eq!(result.index().labels()[2], crate::IndexLabel::Int64(30));
17530        // Data columns: only the non-index columns remain.
17531        let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17532        assert!(!names.contains(&"ints"));
17533        assert!(names.contains(&"floats"));
17534        assert!(names.contains(&"names"));
17535    }
17536
17537    #[cfg(feature = "sql-sqlite")]
17538    #[test]
17539    fn sql_read_with_index_col_none_is_unchanged() {
17540        let frame = make_test_dataframe();
17541        let conn = make_sql_test_conn();
17542        write_sql(&frame, &conn, "noindex_tbl", SqlIfExists::Fail).expect("write");
17543        let baseline = read_sql_table(&conn, "noindex_tbl").expect("baseline");
17544        let result =
17545            read_sql_table_with_index_col(&conn, "noindex_tbl", None).expect("noop variant");
17546        assert_eq!(result.index().labels(), baseline.index().labels());
17547        assert_eq!(result.column_names(), baseline.column_names());
17548    }
17549
17550    #[cfg(feature = "sql-sqlite")]
17551    #[test]
17552    fn sql_read_with_index_col_unknown_column_errors() {
17553        let frame = make_test_dataframe();
17554        let conn = make_sql_test_conn();
17555        write_sql(&frame, &conn, "missing_tbl", SqlIfExists::Fail).expect("write");
17556        let err = read_sql_table_with_index_col(&conn, "missing_tbl", Some("nope")).unwrap_err();
17557        assert!(matches!(err, crate::IoError::Sql(_)));
17558    }
17559
17560    #[cfg(feature = "sql-sqlite")]
17561    #[test]
17562    fn sql_read_table_columns_returns_requested_projection_in_order() {
17563        let frame = make_test_dataframe();
17564        let conn = make_sql_test_conn();
17565        write_sql(&frame, &conn, "proj_tbl", SqlIfExists::Fail).expect("write");
17566
17567        let result = read_sql_table_columns(&conn, "proj_tbl", &["names", "ints"])
17568            .expect("subset projection");
17569        let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17570        assert_eq!(names, vec!["names", "ints"]);
17571        assert_eq!(result.index().len(), 3);
17572        assert_eq!(
17573            result.column("ints").unwrap().values()[0],
17574            Scalar::Int64(10)
17575        );
17576        assert_eq!(
17577            result.column("names").unwrap().values()[2],
17578            Scalar::Utf8("carol".into())
17579        );
17580    }
17581
17582    #[cfg(feature = "sql-sqlite")]
17583    #[test]
17584    fn sql_read_table_columns_single_column_projection() {
17585        let frame = make_test_dataframe();
17586        let conn = make_sql_test_conn();
17587        write_sql(&frame, &conn, "single_tbl", SqlIfExists::Fail).expect("write");
17588
17589        let result =
17590            read_sql_table_columns(&conn, "single_tbl", &["floats"]).expect("single projection");
17591        let names: Vec<&str> = result.column_names().iter().map(|s| s.as_str()).collect();
17592        assert_eq!(names, vec!["floats"]);
17593        assert_eq!(
17594            result.column("floats").unwrap().values()[1],
17595            Scalar::Float64(2.5)
17596        );
17597    }
17598
17599    #[cfg(feature = "sql-sqlite")]
17600    #[test]
17601    fn sql_read_table_columns_rejects_empty_columns() {
17602        let conn = make_sql_test_conn();
17603        let err = read_sql_table_columns(&conn, "any_tbl", &[]).unwrap_err();
17604        assert!(matches!(err, crate::IoError::Sql(_)));
17605    }
17606
17607    #[cfg(feature = "sql-sqlite")]
17608    #[test]
17609    fn sql_read_table_columns_rejects_invalid_column_name() {
17610        let frame = make_test_dataframe();
17611        let conn = make_sql_test_conn();
17612        write_sql(&frame, &conn, "valid_tbl", SqlIfExists::Fail).expect("write");
17613        let err = read_sql_table_columns(&conn, "valid_tbl", &["ints; DROP TABLE valid_tbl"])
17614            .unwrap_err();
17615        assert!(matches!(err, crate::IoError::Sql(_)));
17616    }
17617
17618    #[cfg(feature = "sql-sqlite")]
17619    #[test]
17620    fn sql_read_table_columns_rejects_invalid_table_name() {
17621        let conn = make_sql_test_conn();
17622        let err = read_sql_table_columns(&conn, "bad table", &["ints"]).unwrap_err();
17623        assert!(matches!(err, crate::IoError::Sql(_)));
17624    }
17625
17626    #[cfg(feature = "sql-sqlite")]
17627    #[test]
17628    fn sql_read_table_columns_chunks_returns_requested_projection_in_order() {
17629        let frame = make_test_dataframe();
17630        let conn = make_sql_test_conn();
17631        write_sql(&frame, &conn, "proj_chunk_tbl", SqlIfExists::Fail).expect("write");
17632
17633        let chunks = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &["names", "ints"], 2)
17634            .expect("projection chunk iterator")
17635            .collect::<Result<Vec<_>, _>>()
17636            .expect("all chunks");
17637
17638        assert_eq!(chunks.len(), 2);
17639        assert_eq!(chunks[0].column_names(), vec!["names", "ints"]);
17640        assert_eq!(
17641            chunks[0].column("names").unwrap().values(),
17642            &[
17643                Scalar::Utf8("alice".to_owned()),
17644                Scalar::Utf8("bob".to_owned())
17645            ]
17646        );
17647        assert_eq!(
17648            chunks[1].column("ints").unwrap().values(),
17649            &[Scalar::Int64(30)]
17650        );
17651    }
17652
17653    #[cfg(feature = "sql-sqlite")]
17654    #[test]
17655    fn sql_read_table_columns_chunks_rejects_zero_chunksize() {
17656        let frame = make_test_dataframe();
17657        let conn = make_sql_test_conn();
17658        write_sql(&frame, &conn, "proj_zero_chunk_tbl", SqlIfExists::Fail).expect("write");
17659
17660        let err = read_sql_table_columns_chunks(&conn, "proj_zero_chunk_tbl", &["names"], 0)
17661            .expect_err("zero projection chunksize should be rejected");
17662
17663        assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
17664    }
17665
17666    #[cfg(feature = "sql-sqlite")]
17667    #[test]
17668    fn sql_read_table_columns_chunks_rejects_invalid_projection_inputs() {
17669        let conn = make_sql_test_conn();
17670
17671        let empty = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &[], 1)
17672            .expect_err("empty projection should be rejected");
17673        assert!(matches!(empty, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
17674
17675        let invalid = read_sql_table_columns_chunks(&conn, "proj_chunk_tbl", &["bad column"], 1)
17676            .expect_err("invalid projection name should be rejected");
17677        assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid column name")));
17678    }
17679
17680    #[cfg(feature = "sql-sqlite")]
17681    #[test]
17682    fn sql_read_table_columns_with_index_col_promotes_projected_column() {
17683        let frame = make_test_dataframe();
17684        let conn = make_sql_test_conn();
17685        write_sql(&frame, &conn, "proj_index_tbl", SqlIfExists::Fail).expect("write");
17686
17687        let result = read_sql_table_columns_with_index_col(
17688            &conn,
17689            "proj_index_tbl",
17690            &["names", "ints"],
17691            Some("ints"),
17692        )
17693        .expect("projection with index_col");
17694
17695        assert_eq!(result.index().name(), Some("ints"));
17696        assert_eq!(
17697            result.index().labels(),
17698            &[
17699                IndexLabel::Int64(10),
17700                IndexLabel::Int64(20),
17701                IndexLabel::Int64(30)
17702            ]
17703        );
17704        assert_eq!(result.column_names(), vec!["names"]);
17705        assert_eq!(
17706            result.column("names").unwrap().values(),
17707            &[
17708                Scalar::Utf8("alice".to_owned()),
17709                Scalar::Utf8("bob".to_owned()),
17710                Scalar::Utf8("carol".to_owned())
17711            ]
17712        );
17713        assert!(result.column("ints").is_none());
17714    }
17715
17716    // br-frankenpandas-6n0uz: when index_col is set but NOT in columns,
17717    // pandas auto-projects it into the SELECT (then drops it from data
17718    // columns after promotion). Mirrors fd90.76's behavior on the
17719    // options-based reader.
17720    #[cfg(feature = "sql-sqlite")]
17721    #[test]
17722    fn sql_read_table_columns_with_index_col_auto_projects_when_absent() {
17723        let frame = make_test_dataframe();
17724        let conn = make_sql_test_conn();
17725        write_sql(&frame, &conn, "auto_proj_tbl", SqlIfExists::Fail).expect("write");
17726
17727        // index_col "ints" is NOT in columns — must be auto-projected, then
17728        // promoted to the index, then removed from the data columns.
17729        let result =
17730            read_sql_table_columns_with_index_col(&conn, "auto_proj_tbl", &["names"], Some("ints"))
17731                .expect("auto-project index_col");
17732
17733        assert_eq!(result.index().name(), Some("ints"));
17734        assert_eq!(
17735            result.index().labels(),
17736            &[
17737                IndexLabel::Int64(10),
17738                IndexLabel::Int64(20),
17739                IndexLabel::Int64(30)
17740            ]
17741        );
17742        assert_eq!(result.column_names(), vec!["names"]);
17743        assert!(result.column("ints").is_none());
17744        assert_eq!(
17745            result.column("names").unwrap().values(),
17746            &[
17747                Scalar::Utf8("alice".to_owned()),
17748                Scalar::Utf8("bob".to_owned()),
17749                Scalar::Utf8("carol".to_owned())
17750            ]
17751        );
17752    }
17753
17754    #[cfg(feature = "sql-sqlite")]
17755    #[test]
17756    fn sql_read_table_columns_chunks_with_index_col_auto_projects_when_absent() {
17757        let frame = make_test_dataframe();
17758        let conn = make_sql_test_conn();
17759        write_sql(&frame, &conn, "auto_proj_chunks_tbl", SqlIfExists::Fail).expect("write");
17760
17761        // index_col "ints" is NOT in columns — must be auto-projected per
17762        // chunk and dropped from each chunk's data columns.
17763        let chunks = read_sql_table_columns_chunks_with_index_col(
17764            &conn,
17765            "auto_proj_chunks_tbl",
17766            &["names"],
17767            Some("ints"),
17768            2,
17769        )
17770        .expect("auto-project chunks")
17771        .collect::<Result<Vec<_>, _>>()
17772        .expect("all chunks");
17773
17774        assert_eq!(chunks.len(), 2);
17775        assert_eq!(chunks[0].index().name(), Some("ints"));
17776        assert_eq!(
17777            chunks[0].index().labels(),
17778            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
17779        );
17780        assert_eq!(chunks[0].column_names(), vec!["names"]);
17781        assert!(chunks[0].column("ints").is_none());
17782        assert_eq!(
17783            chunks[0].column("names").unwrap().values(),
17784            &[
17785                Scalar::Utf8("alice".to_owned()),
17786                Scalar::Utf8("bob".to_owned())
17787            ]
17788        );
17789        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
17790        assert_eq!(chunks[1].column_names(), vec!["names"]);
17791        assert!(chunks[1].column("ints").is_none());
17792    }
17793
17794    // br-frankenpandas-6n0uz: idempotency check — when index_col IS already
17795    // in columns, the auto-project helper must not duplicate it. Same final
17796    // result as the original explicit-include test, but proves the helper's
17797    // dedupe path.
17798    #[cfg(feature = "sql-sqlite")]
17799    #[test]
17800    fn sql_read_table_columns_with_index_col_no_duplication_when_listed() {
17801        let frame = make_test_dataframe();
17802        let conn = make_sql_test_conn();
17803        write_sql(&frame, &conn, "no_dup_tbl", SqlIfExists::Fail).expect("write");
17804
17805        // index_col "ints" IS in columns — must NOT be duplicated in SELECT.
17806        let result = read_sql_table_columns_with_index_col(
17807            &conn,
17808            "no_dup_tbl",
17809            &["names", "ints"],
17810            Some("ints"),
17811        )
17812        .expect("explicit include + index_col");
17813
17814        assert_eq!(result.index().name(), Some("ints"));
17815        assert_eq!(result.column_names(), vec!["names"]);
17816        assert!(result.column("ints").is_none());
17817    }
17818
17819    #[cfg(feature = "sql-sqlite")]
17820    #[test]
17821    fn sql_read_table_columns_with_index_col_none_keeps_projection_and_range_index() {
17822        let frame = make_test_dataframe();
17823        let conn = make_sql_test_conn();
17824        write_sql(&frame, &conn, "proj_no_index_tbl", SqlIfExists::Fail).expect("write");
17825
17826        let result = read_sql_table_columns_with_index_col(
17827            &conn,
17828            "proj_no_index_tbl",
17829            &["floats", "names"],
17830            None,
17831        )
17832        .expect("projection without index_col");
17833
17834        assert_eq!(
17835            result.index().labels(),
17836            &[
17837                IndexLabel::Int64(0),
17838                IndexLabel::Int64(1),
17839                IndexLabel::Int64(2)
17840            ]
17841        );
17842        assert_eq!(result.column_names(), vec!["floats", "names"]);
17843        assert_eq!(
17844            result.column("floats").unwrap().values()[1],
17845            Scalar::Float64(2.5)
17846        );
17847    }
17848
17849    #[cfg(feature = "sql-sqlite")]
17850    #[test]
17851    fn sql_read_table_columns_chunks_with_index_col_promotes_each_chunk_index() {
17852        let frame = make_test_dataframe();
17853        let conn = make_sql_test_conn();
17854        write_sql(&frame, &conn, "proj_index_chunk_tbl", SqlIfExists::Fail).expect("write");
17855
17856        let chunks = read_sql_table_columns_chunks_with_index_col(
17857            &conn,
17858            "proj_index_chunk_tbl",
17859            &["ints", "names"],
17860            Some("ints"),
17861            2,
17862        )
17863        .expect("indexed projection chunk iterator")
17864        .collect::<Result<Vec<_>, _>>()
17865        .expect("all chunks");
17866
17867        assert_eq!(chunks.len(), 2);
17868        assert_eq!(chunks[0].index().name(), Some("ints"));
17869        assert_eq!(
17870            chunks[0].index().labels(),
17871            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
17872        );
17873        assert_eq!(chunks[0].column_names(), vec!["names"]);
17874        assert_eq!(
17875            chunks[0].column("names").unwrap().values(),
17876            &[
17877                Scalar::Utf8("alice".to_owned()),
17878                Scalar::Utf8("bob".to_owned())
17879            ]
17880        );
17881        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
17882        assert_eq!(
17883            chunks[1].column("names").unwrap().values(),
17884            &[Scalar::Utf8("carol".to_owned())]
17885        );
17886    }
17887
17888    #[cfg(feature = "sql-sqlite")]
17889    #[test]
17890    fn sql_read_table_columns_chunks_with_index_col_validates_projection_and_index() {
17891        let frame = make_test_dataframe();
17892        let conn = make_sql_test_conn();
17893        write_sql(&frame, &conn, "proj_index_error_tbl", SqlIfExists::Fail).expect("write");
17894
17895        let empty = read_sql_table_columns_chunks_with_index_col(
17896            &conn,
17897            "proj_index_error_tbl",
17898            &[],
17899            Some("ints"),
17900            1,
17901        )
17902        .expect_err("empty projection should be rejected");
17903        assert!(matches!(empty, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
17904
17905        let invalid = read_sql_table_columns_with_index_col(
17906            &conn,
17907            "proj_index_error_tbl",
17908            &["bad column"],
17909            Some("ints"),
17910        )
17911        .expect_err("invalid projection name should be rejected");
17912        assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid column name")));
17913    }
17914
17915    #[cfg(feature = "sql-sqlite")]
17916    #[test]
17917    fn sql_read_query_with_index_col_works_on_arbitrary_select() {
17918        let frame = make_test_dataframe();
17919        let conn = make_sql_test_conn();
17920        write_sql(&frame, &conn, "queried_tbl", SqlIfExists::Fail).expect("write");
17921        let result = read_sql_with_index_col(
17922            &conn,
17923            "SELECT names AS label, ints, floats FROM queried_tbl ORDER BY ints DESC",
17924            Some("label"),
17925        )
17926        .expect("read query with index");
17927        assert_eq!(result.index().name(), Some("label"));
17928        // Order respected by the SELECT (ints DESC) → index labels in
17929        // reversed name order.
17930        assert_eq!(
17931            result.index().labels()[0],
17932            crate::IndexLabel::Utf8("carol".into())
17933        );
17934        assert_eq!(
17935            result.index().labels()[2],
17936            crate::IndexLabel::Utf8("alice".into())
17937        );
17938    }
17939
17940    #[cfg(feature = "sql-sqlite")]
17941    #[test]
17942    fn sql_write_read_roundtrip() {
17943        let frame = make_test_dataframe();
17944        let conn = make_sql_test_conn();
17945
17946        write_sql(&frame, &conn, "test_table", SqlIfExists::Fail).expect("write sql");
17947
17948        let frame2 = read_sql_table(&conn, "test_table").expect("read sql");
17949        assert_eq!(frame2.index().len(), 3);
17950
17951        // Int values survive round-trip.
17952        let ints = frame2.column("ints").unwrap();
17953        assert_eq!(ints.values()[0], Scalar::Int64(10));
17954        assert_eq!(ints.values()[1], Scalar::Int64(20));
17955        assert_eq!(ints.values()[2], Scalar::Int64(30));
17956
17957        // Float values survive round-trip.
17958        let floats = frame2.column("floats").unwrap();
17959        assert_eq!(floats.values()[0], Scalar::Float64(1.5));
17960        assert_eq!(floats.values()[1], Scalar::Float64(2.5));
17961        assert_eq!(floats.values()[2], Scalar::Float64(3.5));
17962
17963        // String values survive round-trip.
17964        let names = frame2.column("names").unwrap();
17965        assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
17966        assert_eq!(names.values()[1], Scalar::Utf8("bob".into()));
17967        assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
17968    }
17969
17970    #[derive(Default)]
17971    struct DollarMarkerSqlConn {
17972        insert_sql: std::cell::RefCell<Vec<String>>,
17973        inserted_rows: std::cell::RefCell<Vec<Vec<Vec<Scalar>>>>,
17974    }
17975
17976    impl super::SqlConnection for DollarMarkerSqlConn {
17977        fn query(
17978            &self,
17979            _query: &str,
17980            _params: &[Scalar],
17981        ) -> Result<super::SqlQueryResult, IoError> {
17982            Err(IoError::Sql("mock connection does not read".to_owned()))
17983        }
17984
17985        fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
17986            Ok(())
17987        }
17988
17989        fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
17990            Ok(false)
17991        }
17992
17993        fn insert_rows(&self, insert_sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
17994            self.insert_sql.borrow_mut().push(insert_sql.to_owned());
17995            self.inserted_rows.borrow_mut().push(rows.to_vec());
17996            Ok(())
17997        }
17998
17999        fn dtype_sql(&self, dtype: DType) -> &'static str {
18000            match dtype {
18001                DType::Int64
18002                | DType::Int64Nullable
18003                | DType::Bool
18004                | DType::BoolNullable
18005                | DType::Timedelta64
18006                | DType::Datetime64 => "BIGINT",
18007                DType::Float64 => "DOUBLE PRECISION",
18008                DType::Utf8
18009                | DType::Categorical
18010                | DType::Null
18011                | DType::Sparse
18012                | DType::Period
18013                | DType::Interval => "TEXT",
18014            }
18015        }
18016
18017        fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18018            "TEXT"
18019        }
18020
18021        fn parameter_marker(&self, ordinal: usize) -> String {
18022            format!("${ordinal}")
18023        }
18024    }
18025
18026    #[test]
18027    fn sql_query_builders_quote_select_and_projection_identifiers() {
18028        let conn = DollarMarkerSqlConn::default();
18029        assert_eq!(
18030            super::sql_select_all_query(&conn, "portable_tbl").expect("select all query"),
18031            "SELECT * FROM \"portable_tbl\""
18032        );
18033        assert_eq!(
18034            super::sql_select_columns_query(&conn, "portable_tbl", &["names", "ints"])
18035                .expect("projection query"),
18036            "SELECT \"names\", \"ints\" FROM \"portable_tbl\""
18037        );
18038
18039        let err = super::sql_select_columns_query(&conn, "portable_tbl", &["bad column"])
18040            .expect_err("projection identifiers stay validated");
18041        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid column name")));
18042    }
18043
18044    #[test]
18045    fn sql_query_builders_create_and_insert_use_backend_contracts() {
18046        let conn = DollarMarkerSqlConn::default();
18047        let column_defs = vec![
18048            super::sql_column_definition(&conn, "row id", "TEXT").expect("index column definition"),
18049            super::sql_column_definition(&conn, "value\"raw", "BIGINT")
18050                .expect("value column definition"),
18051        ];
18052
18053        assert_eq!(
18054            super::sql_create_table_query(&conn, "typed_tbl", &column_defs)
18055                .expect("create table query"),
18056            "CREATE TABLE IF NOT EXISTS \"typed_tbl\" (\"row id\" TEXT, \"value\"\"raw\" BIGINT)"
18057        );
18058
18059        let insert_columns = vec!["row id".to_owned(), "value\"raw".to_owned()];
18060        assert_eq!(
18061            super::sql_insert_rows_query(&conn, "typed_tbl", &insert_columns)
18062                .expect("insert row query"),
18063            "INSERT INTO \"typed_tbl\" (\"row id\", \"value\"\"raw\") VALUES ($1, $2)"
18064        );
18065    }
18066
18067    /// Verify that quote_identifier overrides on a custom backend ACTUALLY
18068    /// flow through the helper functions (br-frankenpandas-cx2x / fd90.12).
18069    /// A MySQL-style backend that returns backticks must produce backticked
18070    /// identifiers in CREATE / SELECT / INSERT statements without any
18071    /// further plumbing.
18072    #[test]
18073    fn sql_query_builders_use_backend_quote_identifier_override() {
18074        #[derive(Default)]
18075        struct BacktickSqlConn;
18076        impl super::SqlConnection for BacktickSqlConn {
18077            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
18078                Ok(super::SqlQueryResult {
18079                    columns: vec![],
18080                    rows: vec![],
18081                })
18082            }
18083            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18084                Ok(())
18085            }
18086            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
18087                Ok(false)
18088            }
18089            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18090                Ok(())
18091            }
18092            fn dtype_sql(&self, _dtype: DType) -> &'static str {
18093                "TEXT"
18094            }
18095            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18096                "TEXT"
18097            }
18098            fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
18099                if ident.contains('\0') {
18100                    return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
18101                }
18102                // MySQL-style backtick quoting; embedded backticks doubled.
18103                Ok(format!("`{}`", ident.replace('`', "``")))
18104            }
18105        }
18106
18107        let conn = BacktickSqlConn;
18108        // SELECT / projection helpers flow through quote_identifier.
18109        assert_eq!(
18110            super::sql_select_all_query(&conn, "users").expect("select all"),
18111            "SELECT * FROM `users`"
18112        );
18113        assert_eq!(
18114            super::sql_select_columns_query(&conn, "users", &["id", "name"]).expect("projection"),
18115            "SELECT `id`, `name` FROM `users`"
18116        );
18117        // CREATE / INSERT helpers flow through quote_identifier.
18118        let col_defs = vec![super::sql_column_definition(&conn, "id", "INTEGER").expect("col def")];
18119        assert_eq!(
18120            super::sql_create_table_query(&conn, "users", &col_defs).expect("create"),
18121            "CREATE TABLE IF NOT EXISTS `users` (`id` INTEGER)"
18122        );
18123        let insert_cols = vec!["id".to_owned(), "name".to_owned()];
18124        assert_eq!(
18125            super::sql_insert_rows_query(&conn, "users", &insert_cols).expect("insert"),
18126            "INSERT INTO `users` (`id`, `name`) VALUES (?, ?)"
18127        );
18128    }
18129
18130    #[test]
18131    fn sql_write_uses_backend_parameter_markers() {
18132        let frame = make_test_dataframe();
18133        let conn = DollarMarkerSqlConn::default();
18134
18135        write_sql(&frame, &conn, "portable_tbl", SqlIfExists::Fail)
18136            .expect("write through marker-aware mock backend");
18137
18138        let insert_sql = conn.insert_sql.borrow();
18139        assert_eq!(
18140            insert_sql.as_slice(),
18141            &["INSERT INTO \"portable_tbl\" (\"ints\", \"floats\", \"names\") VALUES ($1, $2, $3)"
18142                .to_owned()]
18143        );
18144        let inserted_rows = conn.inserted_rows.borrow();
18145        assert_eq!(inserted_rows[0].len(), frame.index().len());
18146        assert_eq!(inserted_rows[0][0][0], Scalar::Int64(10));
18147        assert_eq!(inserted_rows[0][2][2], Scalar::Utf8("carol".into()));
18148    }
18149
18150    #[cfg(feature = "sql-sqlite")]
18151    #[test]
18152    fn sql_write_with_options_includes_named_index_column() {
18153        let mut columns = BTreeMap::new();
18154        columns.insert(
18155            "vals".to_string(),
18156            Column::new(DType::Int64, vec![Scalar::Int64(10), Scalar::Int64(20)]).unwrap(),
18157        );
18158
18159        let frame = DataFrame::new_with_column_order(
18160            Index::new(vec![IndexLabel::Int64(101), IndexLabel::Int64(102)]).set_name("row_id"),
18161            columns,
18162            vec!["vals".to_string()],
18163        )
18164        .unwrap();
18165        let conn = make_sql_test_conn();
18166
18167        write_sql_with_options(
18168            &frame,
18169            &conn,
18170            "indexed_write_tbl",
18171            &SqlWriteOptions {
18172                if_exists: SqlIfExists::Fail,
18173                index: true,
18174                index_label: None,
18175                schema: None,
18176                dtype: None,
18177                method: SqlInsertMethod::Single,
18178                chunksize: None,
18179            },
18180        )
18181        .expect("write with named index");
18182
18183        let roundtrip = read_sql_table_with_index_col(&conn, "indexed_write_tbl", Some("row_id"))
18184            .expect("read with promoted index");
18185        assert_eq!(roundtrip.index().name(), Some("row_id"));
18186        assert_eq!(roundtrip.index().labels(), frame.index().labels());
18187        assert!(roundtrip.column("row_id").is_none());
18188        assert_eq!(
18189            roundtrip.column("vals").unwrap().values(),
18190            &[Scalar::Int64(10), Scalar::Int64(20)]
18191        );
18192    }
18193
18194    #[cfg(feature = "sql-sqlite")]
18195    #[test]
18196    fn sql_write_with_options_unnamed_index_defaults_to_index_column_name() {
18197        let frame = make_test_dataframe();
18198        let conn = make_sql_test_conn();
18199
18200        write_sql_with_options(
18201            &frame,
18202            &conn,
18203            "default_index_tbl",
18204            &SqlWriteOptions {
18205                if_exists: SqlIfExists::Fail,
18206                index: true,
18207                index_label: None,
18208                schema: None,
18209                dtype: None,
18210                method: SqlInsertMethod::Single,
18211                chunksize: None,
18212            },
18213        )
18214        .expect("write with unnamed index");
18215
18216        let raw = read_sql_table(&conn, "default_index_tbl").expect("read raw table");
18217        assert!(raw.column("index").is_some());
18218        assert_eq!(raw.column("index").unwrap().values()[0], Scalar::Int64(0));
18219        assert_eq!(raw.column("index").unwrap().values()[2], Scalar::Int64(2));
18220    }
18221
18222    #[cfg(feature = "sql-sqlite")]
18223    #[test]
18224    fn sql_write_with_options_index_label_overrides_name() {
18225        let mut columns = BTreeMap::new();
18226        columns.insert(
18227            "vals".to_string(),
18228            Column::new(DType::Int64, vec![Scalar::Int64(7), Scalar::Int64(8)]).unwrap(),
18229        );
18230
18231        let frame = DataFrame::new_with_column_order(
18232            Index::new(vec![IndexLabel::Int64(1), IndexLabel::Int64(2)]).set_name("row_id"),
18233            columns,
18234            vec!["vals".to_string()],
18235        )
18236        .unwrap();
18237        let conn = make_sql_test_conn();
18238
18239        write_sql_with_options(
18240            &frame,
18241            &conn,
18242            "override_index_tbl",
18243            &SqlWriteOptions {
18244                if_exists: SqlIfExists::Fail,
18245                index: true,
18246                index_label: Some("custom_id".to_string()),
18247                schema: None,
18248                dtype: None,
18249                method: SqlInsertMethod::Single,
18250                chunksize: None,
18251            },
18252        )
18253        .expect("write with custom index label");
18254
18255        let raw = read_sql_table(&conn, "override_index_tbl").expect("read raw table");
18256        assert!(raw.column("custom_id").is_some());
18257        assert!(raw.column("row_id").is_none());
18258        assert_eq!(
18259            raw.column("custom_id").unwrap().values()[0],
18260            Scalar::Int64(1)
18261        );
18262        assert_eq!(
18263            raw.column("custom_id").unwrap().values()[1],
18264            Scalar::Int64(2)
18265        );
18266    }
18267
18268    #[cfg(feature = "sql-sqlite")]
18269    #[test]
18270    fn sql_write_with_options_index_false_omits_index_column() {
18271        let mut columns = BTreeMap::new();
18272        columns.insert(
18273            "vals".to_string(),
18274            Column::new(DType::Int64, vec![Scalar::Int64(5), Scalar::Int64(6)]).unwrap(),
18275        );
18276
18277        let frame = DataFrame::new_with_column_order(
18278            Index::new(vec![IndexLabel::Int64(9), IndexLabel::Int64(10)]).set_name("row_id"),
18279            columns,
18280            vec!["vals".to_string()],
18281        )
18282        .unwrap();
18283        let conn = make_sql_test_conn();
18284
18285        write_sql_with_options(
18286            &frame,
18287            &conn,
18288            "no_index_write_tbl",
18289            &SqlWriteOptions {
18290                if_exists: SqlIfExists::Fail,
18291                index: false,
18292                index_label: Some("custom_id".to_string()),
18293                schema: None,
18294                dtype: None,
18295                method: SqlInsertMethod::Single,
18296                chunksize: None,
18297            },
18298        )
18299        .expect("write without index");
18300
18301        let raw = read_sql_table(&conn, "no_index_write_tbl").expect("read raw table");
18302        assert!(raw.column("row_id").is_none());
18303        assert!(raw.column("custom_id").is_none());
18304        let names: Vec<&str> = raw
18305            .column_names()
18306            .iter()
18307            .map(|name| name.as_str())
18308            .collect();
18309        assert_eq!(names, vec!["vals"]);
18310    }
18311
18312    #[cfg(feature = "sql-sqlite")]
18313    #[test]
18314    fn sql_read_with_query() {
18315        let frame = make_test_dataframe();
18316        let conn = make_sql_test_conn();
18317        write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18318
18319        let filtered = read_sql(&conn, "SELECT ints, names FROM data WHERE ints > 15").unwrap();
18320        assert_eq!(filtered.index().len(), 2); // rows with ints=20,30
18321        assert_eq!(
18322            filtered.column("ints").unwrap().values()[0],
18323            Scalar::Int64(20)
18324        );
18325        assert_eq!(
18326            filtered.column("names").unwrap().values()[1],
18327            Scalar::Utf8("carol".into())
18328        );
18329    }
18330
18331    #[cfg(feature = "sql-sqlite")]
18332    #[test]
18333    fn sql_read_query_alias_matches_read_sql_query_path() {
18334        let frame = make_test_dataframe();
18335        let conn = make_sql_test_conn();
18336        write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18337
18338        let queried = read_sql_query(
18339            &conn,
18340            "SELECT names, ints FROM data WHERE ints >= 20 ORDER BY ints",
18341        )
18342        .unwrap();
18343
18344        assert_eq!(queried.column_names(), vec!["names", "ints"]);
18345        assert_eq!(queried.index().len(), 2);
18346        assert_eq!(
18347            queried.column("names").unwrap().values(),
18348            &[
18349                Scalar::Utf8("bob".to_owned()),
18350                Scalar::Utf8("carol".to_owned())
18351            ]
18352        );
18353        assert_eq!(
18354            queried.column("ints").unwrap().values(),
18355            &[Scalar::Int64(20), Scalar::Int64(30)]
18356        );
18357    }
18358
18359    #[cfg(feature = "sql-sqlite")]
18360    #[test]
18361    fn sql_read_query_with_options_applies_params_and_parse_dates() {
18362        let conn = make_sql_test_conn();
18363        conn.execute_batch(
18364            "CREATE TABLE events (ts TEXT, value INTEGER);
18365             INSERT INTO events (ts, value) VALUES
18366                ('2024-01-15', 1),
18367                ('2024-02-01 05:06:07', 2),
18368                ('2024-03-03', 3);",
18369        )
18370        .expect("create events table");
18371
18372        let frame = read_sql_query_with_options(
18373            &conn,
18374            "SELECT ts, value FROM events WHERE value > ? ORDER BY value",
18375            &SqlReadOptions {
18376                params: Some(vec![Scalar::Int64(1)]),
18377                parse_dates: Some(vec!["ts".to_owned()]),
18378                coerce_float: false,
18379                dtype: None,
18380                schema: None,
18381                columns: None,
18382                index_col: None,
18383            },
18384        )
18385        .expect("read_sql_query with options");
18386
18387        assert_eq!(frame.column_names(), vec!["ts", "value"]);
18388        assert_eq!(
18389            frame.column("ts").unwrap().values(),
18390            &[
18391                Scalar::Utf8("2024-02-01 05:06:07".to_owned()),
18392                Scalar::Utf8("2024-03-03 00:00:00".to_owned())
18393            ]
18394        );
18395        assert_eq!(
18396            frame.column("value").unwrap().values(),
18397            &[Scalar::Int64(2), Scalar::Int64(3)]
18398        );
18399    }
18400
18401    #[test]
18402    fn sql_read_query_with_options_and_index_col_uses_generic_connection() {
18403        use std::cell::RefCell;
18404
18405        struct RecordingSqlConn {
18406            seen_query: RefCell<Option<String>>,
18407            seen_params: RefCell<Vec<Scalar>>,
18408        }
18409
18410        impl super::SqlConnection for RecordingSqlConn {
18411            fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
18412                *self.seen_query.borrow_mut() = Some(query.to_owned());
18413                *self.seen_params.borrow_mut() = params.to_vec();
18414                Ok(SqlQueryResult {
18415                    columns: vec![
18416                        "row_id".to_owned(),
18417                        "ts".to_owned(),
18418                        "amount".to_owned(),
18419                        "label".to_owned(),
18420                    ],
18421                    rows: vec![
18422                        vec![
18423                            Scalar::Int64(101),
18424                            Scalar::Utf8("2024-01-15".to_owned()),
18425                            Scalar::Utf8("$1.25".to_owned()),
18426                            Scalar::Utf8("alpha".to_owned()),
18427                        ],
18428                        vec![
18429                            Scalar::Int64(102),
18430                            Scalar::Utf8("2024-01-16".to_owned()),
18431                            Scalar::Utf8("2.50".to_owned()),
18432                            Scalar::Utf8("beta".to_owned()),
18433                        ],
18434                    ],
18435                })
18436            }
18437
18438            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18439                Ok(())
18440            }
18441
18442            fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
18443                Ok(false)
18444            }
18445
18446            fn insert_rows(&self, _insert_sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18447                Ok(())
18448            }
18449
18450            fn dtype_sql(&self, _dtype: DType) -> &'static str {
18451                "TEXT"
18452            }
18453
18454            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18455                "BIGINT"
18456            }
18457        }
18458
18459        let conn = RecordingSqlConn {
18460            seen_query: RefCell::new(None),
18461            seen_params: RefCell::new(Vec::new()),
18462        };
18463        let query = "SELECT row_id, ts, amount, label FROM events WHERE amount > ?";
18464        let frame = super::read_sql_query_with_options_and_index_col(
18465            &conn,
18466            query,
18467            &SqlReadOptions {
18468                params: Some(vec![Scalar::Float64(1.0)]),
18469                parse_dates: Some(vec!["ts".to_owned()]),
18470                coerce_float: true,
18471                dtype: None,
18472                schema: None,
18473                columns: None,
18474                index_col: Some("amount".to_owned()),
18475            },
18476            Some("row_id"),
18477        )
18478        .expect("generic read_sql query with options and index_col");
18479
18480        assert_eq!(conn.seen_query.borrow().as_deref(), Some(query));
18481        assert_eq!(
18482            conn.seen_params.borrow().as_slice(),
18483            &[Scalar::Float64(1.0)]
18484        );
18485        assert_eq!(frame.index().name(), Some("row_id"));
18486        assert_eq!(
18487            frame.index().labels(),
18488            &[IndexLabel::Int64(101), IndexLabel::Int64(102)]
18489        );
18490        assert_eq!(frame.column_names(), vec!["ts", "amount", "label"]);
18491        assert_eq!(
18492            frame.column("ts").unwrap().values(),
18493            &[
18494                Scalar::Utf8("2024-01-15 00:00:00".to_owned()),
18495                Scalar::Utf8("2024-01-16 00:00:00".to_owned())
18496            ]
18497        );
18498        assert_eq!(
18499            frame.column("amount").unwrap().values(),
18500            &[Scalar::Float64(1.25), Scalar::Float64(2.5)]
18501        );
18502        assert_eq!(
18503            frame.column("label").unwrap().values(),
18504            &[
18505                Scalar::Utf8("alpha".to_owned()),
18506                Scalar::Utf8("beta".to_owned())
18507            ]
18508        );
18509    }
18510
18511    #[cfg(feature = "sql-sqlite")]
18512    #[test]
18513    fn sql_read_query_with_index_col_promotes_named_column() {
18514        let frame = make_test_dataframe();
18515        let conn = make_sql_test_conn();
18516        write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
18517
18518        let indexed = read_sql_query_with_index_col(
18519            &conn,
18520            "SELECT names, ints FROM data ORDER BY ints",
18521            Some("names"),
18522        )
18523        .unwrap();
18524
18525        assert_eq!(
18526            indexed.index().labels(),
18527            &[
18528                IndexLabel::Utf8("alice".to_owned()),
18529                IndexLabel::Utf8("bob".to_owned()),
18530                IndexLabel::Utf8("carol".to_owned())
18531            ]
18532        );
18533        assert_eq!(indexed.index().name(), Some("names"));
18534        assert!(indexed.column("names").is_none());
18535        assert_eq!(
18536            indexed.column("ints").unwrap().values(),
18537            &[Scalar::Int64(10), Scalar::Int64(20), Scalar::Int64(30)]
18538        );
18539    }
18540
18541    #[cfg(feature = "sql-sqlite")]
18542    #[test]
18543    fn sql_read_query_chunks_alias_batches_rows() {
18544        let conn = make_sql_test_conn();
18545        conn.execute_batch(
18546            "CREATE TABLE query_chunked (id INTEGER, name TEXT);
18547             INSERT INTO query_chunked (id, name) VALUES
18548                (1, 'alpha'),
18549                (2, 'beta'),
18550                (3, 'gamma');",
18551        )
18552        .expect("create query_chunked table");
18553
18554        let chunks =
18555            read_sql_query_chunks(&conn, "SELECT id, name FROM query_chunked ORDER BY id", 2)
18556                .expect("query chunk iterator")
18557                .collect::<Result<Vec<_>, _>>()
18558                .expect("all chunks");
18559
18560        assert_eq!(chunks.len(), 2);
18561        assert_eq!(chunks[0].column_names(), vec!["id", "name"]);
18562        assert_eq!(
18563            chunks[0].column("id").unwrap().values(),
18564            &[Scalar::Int64(1), Scalar::Int64(2)]
18565        );
18566        assert_eq!(
18567            chunks[1].column("name").unwrap().values(),
18568            &[Scalar::Utf8("gamma".to_owned())]
18569        );
18570    }
18571
18572    #[cfg(feature = "sql-sqlite")]
18573    #[test]
18574    fn sql_read_query_chunks_with_options_applies_params_parse_dates_and_coerce_float() {
18575        let conn = make_sql_test_conn();
18576        conn.execute_batch(
18577            "CREATE TABLE query_events (ts TEXT, amount TEXT, keep INTEGER);
18578             INSERT INTO query_events (ts, amount, keep) VALUES
18579                ('2024-01-15', '12.50', 0),
18580                ('2024-02-01 05:06:07', '$1,234.50', 1),
18581                ('2024-03-03', '-3.25', 1);",
18582        )
18583        .expect("create query_events table");
18584
18585        let chunks = read_sql_query_chunks_with_options(
18586            &conn,
18587            "SELECT ts, amount FROM query_events WHERE keep = ? ORDER BY ts",
18588            &SqlReadOptions {
18589                params: Some(vec![Scalar::Int64(1)]),
18590                parse_dates: Some(vec!["ts".to_owned()]),
18591                coerce_float: true,
18592                dtype: None,
18593                schema: None,
18594                columns: None,
18595                index_col: None,
18596            },
18597            1,
18598        )
18599        .expect("query chunk iterator")
18600        .collect::<Result<Vec<_>, _>>()
18601        .expect("all chunks");
18602
18603        assert_eq!(chunks.len(), 2);
18604        assert_eq!(
18605            chunks[0].column("ts").unwrap().values(),
18606            &[Scalar::Utf8("2024-02-01 05:06:07".to_owned())]
18607        );
18608        assert_eq!(
18609            chunks[0].column("amount").unwrap().values(),
18610            &[Scalar::Float64(1234.5)]
18611        );
18612        assert_eq!(
18613            chunks[1].column("ts").unwrap().values(),
18614            &[Scalar::Utf8("2024-03-03 00:00:00".to_owned())]
18615        );
18616        assert_eq!(
18617            chunks[1].column("amount").unwrap().values(),
18618            &[Scalar::Float64(-3.25)]
18619        );
18620    }
18621
18622    #[test]
18623    fn sql_read_chunks_uses_paged_queries_when_backend_opts_in() {
18624        use std::cell::RefCell;
18625
18626        struct PagedChunksConn {
18627            queries: RefCell<Vec<(String, Vec<Scalar>)>>,
18628            rows: Vec<Vec<Scalar>>,
18629        }
18630
18631        impl PagedChunksConn {
18632            fn page_bounds(params: &[Scalar]) -> (usize, usize) {
18633                let [
18634                    Scalar::Int64(1),
18635                    Scalar::Int64(limit),
18636                    Scalar::Int64(offset),
18637                ] = params
18638                else {
18639                    assert_eq!(
18640                        params,
18641                        &[Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(0),],
18642                        "expected original param plus LIMIT/OFFSET params"
18643                    );
18644                    return (0, 0);
18645                };
18646                (
18647                    usize::try_from(*limit).expect("non-negative limit"),
18648                    usize::try_from(*offset).expect("non-negative offset"),
18649                )
18650            }
18651        }
18652
18653        impl super::SqlConnection for PagedChunksConn {
18654            fn query(&self, query: &str, params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
18655                self.queries
18656                    .borrow_mut()
18657                    .push((query.to_owned(), params.to_vec()));
18658                assert!(
18659                    query.contains("frankenpandas_sql_chunk_source")
18660                        && query.contains("LIMIT ? OFFSET ?"),
18661                    "paged chunk path should wrap the caller query with LIMIT/OFFSET, got {query}"
18662                );
18663
18664                let (limit, offset) = Self::page_bounds(params);
18665                let rows = self.rows.iter().skip(offset).take(limit).cloned().collect();
18666                Ok(SqlQueryResult {
18667                    columns: vec!["id".to_owned(), "name".to_owned()],
18668                    rows,
18669                })
18670            }
18671
18672            fn supports_paged_sql_chunks(&self) -> bool {
18673                true
18674            }
18675
18676            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
18677                Ok(())
18678            }
18679
18680            fn table_exists(&self, _table_name: &str) -> Result<bool, IoError> {
18681                Ok(false)
18682            }
18683
18684            fn insert_rows(&self, _insert_sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
18685                Ok(())
18686            }
18687
18688            fn dtype_sql(&self, _dtype: DType) -> &'static str {
18689                "TEXT"
18690            }
18691
18692            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
18693                "TEXT"
18694            }
18695        }
18696
18697        let conn = PagedChunksConn {
18698            queries: RefCell::new(Vec::new()),
18699            rows: vec![
18700                vec![Scalar::Int64(1), Scalar::Utf8("a".to_owned())],
18701                vec![Scalar::Int64(2), Scalar::Utf8("b".to_owned())],
18702                vec![Scalar::Int64(3), Scalar::Utf8("c".to_owned())],
18703                vec![Scalar::Int64(4), Scalar::Utf8("d".to_owned())],
18704                vec![Scalar::Int64(5), Scalar::Utf8("e".to_owned())],
18705            ],
18706        };
18707
18708        let chunks = read_sql_chunks_with_options(
18709            &conn,
18710            "SELECT id, name FROM paged_source WHERE keep = ? ORDER BY id;",
18711            &SqlReadOptions {
18712                params: Some(vec![Scalar::Int64(1)]),
18713                ..SqlReadOptions::default()
18714            },
18715            2,
18716        )
18717        .expect("paged chunk iterator")
18718        .collect::<Result<Vec<_>, _>>()
18719        .expect("all chunks");
18720
18721        assert_eq!(chunks.len(), 3);
18722        assert_eq!(
18723            chunks[0].column("id").unwrap().values(),
18724            &[Scalar::Int64(1), Scalar::Int64(2)]
18725        );
18726        assert_eq!(
18727            chunks[1].column("id").unwrap().values(),
18728            &[Scalar::Int64(3), Scalar::Int64(4)]
18729        );
18730        assert_eq!(
18731            chunks[2].column("name").unwrap().values(),
18732            &[Scalar::Utf8("e".to_owned())]
18733        );
18734
18735        let queries = conn.queries.borrow();
18736        let expected_params = vec![
18737            vec![Scalar::Int64(1), Scalar::Int64(0), Scalar::Int64(0)],
18738            vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(0)],
18739            vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(2)],
18740            vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(4)],
18741        ];
18742        assert_eq!(
18743            queries
18744                .iter()
18745                .map(|(_, params)| params.clone())
18746                .collect::<Vec<_>>(),
18747            expected_params
18748        );
18749    }
18750
18751    #[cfg(feature = "sql-sqlite")]
18752    #[test]
18753    fn sql_read_query_chunks_with_options_and_index_col_applies_options_before_indexing() {
18754        let conn = make_sql_test_conn();
18755        conn.execute_batch(
18756            "CREATE TABLE query_index_events (ts TEXT, amount TEXT, keep INTEGER);
18757             INSERT INTO query_index_events (ts, amount, keep) VALUES
18758                ('2024-01-15', '12.50', 0),
18759                ('2024-02-01 05:06:07', '$1,234.50', 1),
18760                ('2024-03-03', '-3.25', 1);",
18761        )
18762        .expect("create query_index_events table");
18763
18764        let chunks = read_sql_query_chunks_with_options_and_index_col(
18765            &conn,
18766            "SELECT ts, amount FROM query_index_events WHERE keep = ? ORDER BY ts",
18767            &SqlReadOptions {
18768                params: Some(vec![Scalar::Int64(1)]),
18769                parse_dates: Some(vec!["ts".to_owned()]),
18770                coerce_float: true,
18771                dtype: None,
18772                schema: None,
18773                columns: None,
18774                index_col: None,
18775            },
18776            Some("ts"),
18777            1,
18778        )
18779        .expect("indexed query chunk iterator")
18780        .collect::<Result<Vec<_>, _>>()
18781        .expect("all chunks");
18782
18783        assert_eq!(chunks.len(), 2);
18784        assert_eq!(chunks[0].index().name(), Some("ts"));
18785        assert_eq!(
18786            chunks[0].index().labels(),
18787            &[IndexLabel::Utf8("2024-02-01 05:06:07".to_owned())]
18788        );
18789        assert!(chunks[0].column("ts").is_none());
18790        assert_eq!(
18791            chunks[0].column("amount").unwrap().values(),
18792            &[Scalar::Float64(1234.5)]
18793        );
18794        assert_eq!(
18795            chunks[1].index().labels(),
18796            &[IndexLabel::Utf8("2024-03-03 00:00:00".to_owned())]
18797        );
18798        assert_eq!(
18799            chunks[1].column("amount").unwrap().values(),
18800            &[Scalar::Float64(-3.25)]
18801        );
18802    }
18803
18804    #[cfg(feature = "sql-sqlite")]
18805    #[test]
18806    fn sql_read_chunks_with_options_and_index_col_none_keeps_options_and_range_index() {
18807        let conn = make_sql_test_conn();
18808        conn.execute_batch(
18809            "CREATE TABLE query_options_no_index (id INTEGER, amount TEXT);
18810             INSERT INTO query_options_no_index (id, amount) VALUES
18811                (1, '$10.50'),
18812                (2, '11.25');",
18813        )
18814        .expect("create query_options_no_index table");
18815
18816        let chunks = read_sql_chunks_with_options_and_index_col(
18817            &conn,
18818            "SELECT id, amount FROM query_options_no_index ORDER BY id",
18819            &SqlReadOptions {
18820                params: None,
18821                parse_dates: None,
18822                coerce_float: true,
18823                dtype: None,
18824                schema: None,
18825                columns: None,
18826                index_col: None,
18827            },
18828            None,
18829            1,
18830        )
18831        .expect("query chunk iterator")
18832        .collect::<Result<Vec<_>, _>>()
18833        .expect("all chunks");
18834
18835        assert_eq!(chunks.len(), 2);
18836        assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(0)]);
18837        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
18838        assert_eq!(
18839            chunks[0].column("amount").unwrap().values(),
18840            &[Scalar::Float64(10.5)]
18841        );
18842        assert_eq!(
18843            chunks[1].column("amount").unwrap().values(),
18844            &[Scalar::Float64(11.25)]
18845        );
18846    }
18847
18848    #[cfg(feature = "sql-sqlite")]
18849    #[test]
18850    fn sql_read_chunks_with_options_and_index_col_uses_options_index_when_explicit_none() {
18851        let conn = make_sql_test_conn();
18852        conn.execute_batch(
18853            "CREATE TABLE query_options_struct_index (id INTEGER, amount TEXT);
18854             INSERT INTO query_options_struct_index (id, amount) VALUES
18855                (10, '$10.50'),
18856                (20, '11.25');",
18857        )
18858        .expect("create query_options_struct_index table");
18859
18860        let chunks = read_sql_chunks_with_options_and_index_col(
18861            &conn,
18862            "SELECT id, amount FROM query_options_struct_index ORDER BY id",
18863            &SqlReadOptions {
18864                params: None,
18865                parse_dates: None,
18866                coerce_float: true,
18867                dtype: None,
18868                schema: None,
18869                columns: None,
18870                index_col: Some("id".to_owned()),
18871            },
18872            None,
18873            1,
18874        )
18875        .expect("query chunk iterator")
18876        .collect::<Result<Vec<_>, _>>()
18877        .expect("all chunks");
18878
18879        assert_eq!(chunks.len(), 2);
18880        assert_eq!(chunks[0].index().name(), Some("id"));
18881        assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(10)]);
18882        assert!(chunks[0].column("id").is_none());
18883        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(20)]);
18884        assert_eq!(
18885            chunks[1].column("amount").unwrap().values(),
18886            &[Scalar::Float64(11.25)]
18887        );
18888    }
18889
18890    #[cfg(feature = "sql-sqlite")]
18891    #[test]
18892    fn sql_read_query_chunks_with_options_and_index_col_missing_column_errors() {
18893        let conn = make_sql_test_conn();
18894        conn.execute_batch(
18895            "CREATE TABLE query_options_missing_index (id INTEGER, amount TEXT);
18896             INSERT INTO query_options_missing_index (id, amount) VALUES (1, '10.5');",
18897        )
18898        .expect("create query_options_missing_index table");
18899
18900        let err = read_sql_query_chunks_with_options_and_index_col(
18901            &conn,
18902            "SELECT id, amount FROM query_options_missing_index",
18903            &SqlReadOptions {
18904                params: None,
18905                parse_dates: None,
18906                coerce_float: true,
18907                dtype: None,
18908                schema: None,
18909                columns: None,
18910                index_col: None,
18911            },
18912            Some("missing"),
18913            1,
18914        )
18915        .expect_err("missing index_col should error during iterator construction");
18916
18917        assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
18918    }
18919
18920    #[cfg(feature = "sql-sqlite")]
18921    #[test]
18922    fn sql_read_query_with_options_and_index_col_applies_options_before_indexing() {
18923        let conn = make_sql_test_conn();
18924        conn.execute_batch(
18925            "CREATE TABLE query_frame_index_events (ts TEXT, amount TEXT, keep INTEGER);
18926             INSERT INTO query_frame_index_events (ts, amount, keep) VALUES
18927                ('2024-01-15', '12.50', 0),
18928                ('2024-02-01 05:06:07', '$1,234.50', 1),
18929                ('2024-03-03', '-3.25', 1);",
18930        )
18931        .expect("create query_frame_index_events table");
18932
18933        let frame = read_sql_query_with_options_and_index_col(
18934            &conn,
18935            "SELECT ts, amount FROM query_frame_index_events WHERE keep = ? ORDER BY ts",
18936            &SqlReadOptions {
18937                params: Some(vec![Scalar::Int64(1)]),
18938                parse_dates: Some(vec!["ts".to_owned()]),
18939                coerce_float: true,
18940                dtype: None,
18941                schema: None,
18942                columns: None,
18943                index_col: None,
18944            },
18945            Some("ts"),
18946        )
18947        .expect("read indexed query frame");
18948
18949        assert_eq!(frame.index().name(), Some("ts"));
18950        assert_eq!(
18951            frame.index().labels(),
18952            &[
18953                IndexLabel::Utf8("2024-02-01 05:06:07".to_owned()),
18954                IndexLabel::Utf8("2024-03-03 00:00:00".to_owned())
18955            ]
18956        );
18957        assert!(frame.column("ts").is_none());
18958        assert_eq!(
18959            frame.column("amount").unwrap().values(),
18960            &[Scalar::Float64(1234.5), Scalar::Float64(-3.25)]
18961        );
18962    }
18963
18964    #[cfg(feature = "sql-sqlite")]
18965    #[test]
18966    fn sql_read_query_with_options_and_index_col_explicit_arg_wins() {
18967        let conn = make_sql_test_conn();
18968        conn.execute_batch(
18969            "CREATE TABLE query_frame_index_override (a INTEGER, b INTEGER, val TEXT);
18970             INSERT INTO query_frame_index_override (a, b, val) VALUES
18971                (1, 100, 'x'),
18972                (2, 200, 'y');",
18973        )
18974        .expect("create query_frame_index_override table");
18975
18976        let frame = read_sql_query_with_options_and_index_col(
18977            &conn,
18978            "SELECT a, b, val FROM query_frame_index_override ORDER BY a",
18979            &SqlReadOptions {
18980                index_col: Some("a".to_owned()),
18981                ..SqlReadOptions::default()
18982            },
18983            Some("b"),
18984        )
18985        .expect("read indexed query frame with override");
18986
18987        assert_eq!(frame.column_names(), vec!["a", "val"]);
18988        assert_eq!(
18989            frame.index().labels(),
18990            &[IndexLabel::Int64(100), IndexLabel::Int64(200)]
18991        );
18992    }
18993
18994    #[cfg(feature = "sql-sqlite")]
18995    #[test]
18996    fn sql_read_query_chunks_rejects_zero_chunksize() {
18997        let conn = make_sql_test_conn();
18998
18999        let err = read_sql_query_chunks(&conn, "SELECT 1", 0)
19000            .expect_err("zero query chunksize should be rejected");
19001
19002        assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
19003    }
19004
19005    #[cfg(feature = "sql-sqlite")]
19006    #[test]
19007    fn sql_read_query_chunks_with_index_col_promotes_each_chunk_index() {
19008        let conn = make_sql_test_conn();
19009        conn.execute_batch(
19010            "CREATE TABLE query_index_chunked (id INTEGER, label TEXT, value INTEGER);
19011             INSERT INTO query_index_chunked (id, label, value) VALUES
19012                (1, 'alpha', 10),
19013                (2, 'beta', 20),
19014                (3, 'gamma', 30);",
19015        )
19016        .expect("create query_index_chunked table");
19017
19018        let chunks = read_sql_query_chunks_with_index_col(
19019            &conn,
19020            "SELECT id, label, value FROM query_index_chunked ORDER BY id",
19021            Some("label"),
19022            2,
19023        )
19024        .expect("query indexed chunk iterator")
19025        .collect::<Result<Vec<_>, _>>()
19026        .expect("all chunks");
19027
19028        assert_eq!(chunks.len(), 2);
19029        assert_eq!(chunks[0].index().name(), Some("label"));
19030        assert_eq!(
19031            chunks[0].index().labels(),
19032            &[
19033                IndexLabel::Utf8("alpha".to_owned()),
19034                IndexLabel::Utf8("beta".to_owned())
19035            ]
19036        );
19037        assert!(chunks[0].column("label").is_none());
19038        assert_eq!(
19039            chunks[1].index().labels(),
19040            &[IndexLabel::Utf8("gamma".to_owned())]
19041        );
19042        assert_eq!(
19043            chunks[1].column("value").unwrap().values(),
19044            &[Scalar::Int64(30)]
19045        );
19046    }
19047
19048    #[cfg(feature = "sql-sqlite")]
19049    #[test]
19050    fn sql_read_chunks_with_index_col_none_keeps_fresh_chunk_range_indexes() {
19051        let conn = make_sql_test_conn();
19052        conn.execute_batch(
19053            "CREATE TABLE query_no_index_chunked (id INTEGER, label TEXT);
19054             INSERT INTO query_no_index_chunked (id, label) VALUES
19055                (1, 'alpha'),
19056                (2, 'beta');",
19057        )
19058        .expect("create query_no_index_chunked table");
19059
19060        let chunks = read_sql_chunks_with_index_col(
19061            &conn,
19062            "SELECT id, label FROM query_no_index_chunked ORDER BY id",
19063            None,
19064            1,
19065        )
19066        .expect("query chunk iterator")
19067        .collect::<Result<Vec<_>, _>>()
19068        .expect("all chunks");
19069
19070        assert_eq!(chunks.len(), 2);
19071        assert_eq!(chunks[0].index().labels(), &[IndexLabel::Int64(0)]);
19072        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
19073        assert_eq!(chunks[1].column_names(), vec!["id", "label"]);
19074    }
19075
19076    #[cfg(feature = "sql-sqlite")]
19077    #[test]
19078    fn sql_read_query_chunks_with_index_col_missing_column_errors() {
19079        let conn = make_sql_test_conn();
19080        conn.execute_batch(
19081            "CREATE TABLE query_missing_index_chunked (id INTEGER, value INTEGER);
19082             INSERT INTO query_missing_index_chunked (id, value) VALUES (1, 10);",
19083        )
19084        .expect("create query_missing_index_chunked table");
19085
19086        let err = read_sql_query_chunks_with_index_col(
19087            &conn,
19088            "SELECT id, value FROM query_missing_index_chunked",
19089            Some("missing"),
19090            1,
19091        )
19092        .expect_err("missing index_col should error during iterator construction");
19093
19094        assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
19095    }
19096
19097    #[cfg(feature = "sql-sqlite")]
19098    #[test]
19099    fn sql_read_table_chunks_batches_rows() {
19100        let conn = make_sql_test_conn();
19101        conn.execute_batch(
19102            "CREATE TABLE table_chunked (id INTEGER, name TEXT);
19103             INSERT INTO table_chunked (id, name) VALUES
19104                (1, 'alpha'),
19105                (2, 'beta'),
19106                (3, 'gamma'),
19107                (4, 'delta');",
19108        )
19109        .expect("create table_chunked table");
19110
19111        let chunks = read_sql_table_chunks(&conn, "table_chunked", 3)
19112            .expect("table chunk iterator")
19113            .collect::<Result<Vec<_>, _>>()
19114            .expect("all chunks");
19115
19116        assert_eq!(chunks.len(), 2);
19117        assert_eq!(chunks[0].column_names(), vec!["id", "name"]);
19118        assert_eq!(
19119            chunks[0].column("id").unwrap().values(),
19120            &[Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)]
19121        );
19122        assert_eq!(
19123            chunks[1].column("name").unwrap().values(),
19124            &[Scalar::Utf8("delta".to_owned())]
19125        );
19126    }
19127
19128    #[cfg(feature = "sql-sqlite")]
19129    #[test]
19130    fn sql_read_table_chunks_rejects_zero_chunksize() {
19131        let conn = make_sql_test_conn();
19132        conn.execute_batch("CREATE TABLE zero_chunked (id INTEGER);")
19133            .expect("create zero_chunked table");
19134
19135        let err = read_sql_table_chunks(&conn, "zero_chunked", 0)
19136            .expect_err("zero table chunksize should be rejected");
19137
19138        assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
19139    }
19140
19141    #[cfg(feature = "sql-sqlite")]
19142    #[test]
19143    fn sql_read_table_chunks_rejects_invalid_table_name() {
19144        let conn = make_sql_test_conn();
19145
19146        let err = read_sql_table_chunks(&conn, "bad table", 1)
19147            .expect_err("invalid table name should be rejected");
19148
19149        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid table name")));
19150    }
19151
19152    #[cfg(feature = "sql-sqlite")]
19153    #[test]
19154    fn sql_read_table_chunks_with_index_col_promotes_each_chunk_index() {
19155        let conn = make_sql_test_conn();
19156        conn.execute_batch(
19157            "CREATE TABLE table_index_chunked (id INTEGER, name TEXT, score INTEGER);
19158             INSERT INTO table_index_chunked (id, name, score) VALUES
19159                (10, 'alpha', 100),
19160                (20, 'beta', 200),
19161                (30, 'gamma', 300);",
19162        )
19163        .expect("create table_index_chunked table");
19164
19165        let chunks =
19166            read_sql_table_chunks_with_index_col(&conn, "table_index_chunked", Some("id"), 2)
19167                .expect("table indexed chunk iterator")
19168                .collect::<Result<Vec<_>, _>>()
19169                .expect("all chunks");
19170
19171        assert_eq!(chunks.len(), 2);
19172        assert_eq!(chunks[0].index().name(), Some("id"));
19173        assert_eq!(
19174            chunks[0].index().labels(),
19175            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
19176        );
19177        assert!(chunks[0].column("id").is_none());
19178        assert_eq!(
19179            chunks[1].column("score").unwrap().values(),
19180            &[Scalar::Int64(300)]
19181        );
19182    }
19183
19184    #[cfg(feature = "sql-sqlite")]
19185    #[test]
19186    fn sql_read_table_with_options_applies_parse_dates_and_coerce_float() {
19187        let conn = make_sql_test_conn();
19188        conn.execute_batch(
19189            "CREATE TABLE table_options (ts TEXT, amount TEXT, label TEXT);
19190             INSERT INTO table_options (ts, amount, label) VALUES
19191                ('2024-01-15', '$12.50', 'a'),
19192                ('2024-02-01 05:06:07', '1,234.50', 'b');",
19193        )
19194        .expect("create table_options table");
19195
19196        let frame = read_sql_table_with_options(
19197            &conn,
19198            "table_options",
19199            &SqlReadOptions {
19200                params: None,
19201                parse_dates: Some(vec!["ts".to_owned()]),
19202                coerce_float: true,
19203                dtype: None,
19204                schema: None,
19205                columns: None,
19206                index_col: None,
19207            },
19208        )
19209        .expect("read table with options");
19210
19211        assert_eq!(
19212            frame.column("ts").unwrap().values(),
19213            &[
19214                Scalar::Utf8("2024-01-15 00:00:00".to_owned()),
19215                Scalar::Utf8("2024-02-01 05:06:07".to_owned())
19216            ]
19217        );
19218        assert_eq!(
19219            frame.column("amount").unwrap().values(),
19220            &[Scalar::Float64(12.5), Scalar::Float64(1234.5)]
19221        );
19222        assert_eq!(
19223            frame.column("label").unwrap().values(),
19224            &[Scalar::Utf8("a".to_owned()), Scalar::Utf8("b".to_owned())]
19225        );
19226    }
19227
19228    #[cfg(feature = "sql-sqlite")]
19229    #[test]
19230    fn sql_read_table_chunks_with_options_applies_options_before_chunking() {
19231        let conn = make_sql_test_conn();
19232        conn.execute_batch(
19233            "CREATE TABLE table_options_chunked (ts TEXT, amount TEXT);
19234             INSERT INTO table_options_chunked (ts, amount) VALUES
19235                ('2024-03-01', '$10.00'),
19236                ('2024-03-02', '$20.50'),
19237                ('2024-03-03', '-3.25');",
19238        )
19239        .expect("create table_options_chunked table");
19240
19241        let chunks = read_sql_table_chunks_with_options(
19242            &conn,
19243            "table_options_chunked",
19244            &SqlReadOptions {
19245                params: None,
19246                parse_dates: Some(vec!["ts".to_owned()]),
19247                coerce_float: true,
19248                dtype: None,
19249                schema: None,
19250                columns: None,
19251                index_col: None,
19252            },
19253            2,
19254        )
19255        .expect("table option chunk iterator")
19256        .collect::<Result<Vec<_>, _>>()
19257        .expect("all chunks");
19258
19259        assert_eq!(chunks.len(), 2);
19260        assert_eq!(
19261            chunks[0].column("ts").unwrap().values(),
19262            &[
19263                Scalar::Utf8("2024-03-01 00:00:00".to_owned()),
19264                Scalar::Utf8("2024-03-02 00:00:00".to_owned())
19265            ]
19266        );
19267        assert_eq!(
19268            chunks[0].column("amount").unwrap().values(),
19269            &[Scalar::Float64(10.0), Scalar::Float64(20.5)]
19270        );
19271        assert_eq!(
19272            chunks[1].column("amount").unwrap().values(),
19273            &[Scalar::Float64(-3.25)]
19274        );
19275    }
19276
19277    #[cfg(feature = "sql-sqlite")]
19278    #[test]
19279    fn sql_read_table_chunks_with_options_validates_chunksize_and_table_name() {
19280        let conn = make_sql_test_conn();
19281        conn.execute_batch(
19282            "CREATE TABLE table_options_errors (ts TEXT);
19283             INSERT INTO table_options_errors (ts) VALUES ('2024-01-01');",
19284        )
19285        .expect("create table_options_errors table");
19286
19287        let zero = read_sql_table_chunks_with_options(
19288            &conn,
19289            "table_options_errors",
19290            &SqlReadOptions::default(),
19291            0,
19292        )
19293        .expect_err("zero chunksize should be rejected");
19294        assert!(matches!(zero, IoError::Sql(msg) if msg.contains("chunksize")));
19295
19296        let invalid = read_sql_table_with_options(
19297            &conn,
19298            "bad table",
19299            &SqlReadOptions {
19300                parse_dates: Some(vec!["ts".to_owned()]),
19301                ..SqlReadOptions::default()
19302            },
19303        )
19304        .expect_err("invalid table name should be rejected");
19305        assert!(matches!(invalid, IoError::Sql(msg) if msg.contains("invalid table name")));
19306    }
19307
19308    // br-frankenpandas-i8kja: read_sql_table_chunks_with_options previously
19309    // accepted SqlReadOptions { index_col: Some(...) } and silently dropped
19310    // the index_col while the full-frame sibling honored it. Surface the
19311    // mismatch with a typed error so callers route to the
19312    // `_and_index_col` variant. The plain entrypoint stays
19313    // `Result<SqlChunkIterator, _>` for backwards compatibility — the
19314    // indexed surface is what carries promotion logic.
19315    #[cfg(feature = "sql-sqlite")]
19316    #[test]
19317    fn read_sql_table_chunks_with_options_rejects_options_index_col() {
19318        let conn = make_sql_test_conn();
19319        super::SqlConnection::execute_batch(
19320            &conn,
19321            "CREATE TABLE i8kja_table_chunks_reject (id INTEGER, val TEXT);",
19322        )
19323        .unwrap();
19324        super::SqlConnection::execute_batch(
19325            &conn,
19326            "INSERT INTO i8kja_table_chunks_reject VALUES (1, 'a'), (2, 'b');",
19327        )
19328        .unwrap();
19329
19330        let err = read_sql_table_chunks_with_options(
19331            &conn,
19332            "i8kja_table_chunks_reject",
19333            &SqlReadOptions {
19334                index_col: Some("id".to_owned()),
19335                ..Default::default()
19336            },
19337            2,
19338        )
19339        .expect_err("options.index_col on non-indexed entrypoint must be rejected");
19340        assert!(
19341            matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_table_chunks_with_options_and_index_col")),
19342            "expected typed error pointing to the _and_index_col variant, got: {err:?}"
19343        );
19344
19345        // Sanity: same options struct on the indexed sibling honors index_col.
19346        let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
19347            &conn,
19348            "i8kja_table_chunks_reject",
19349            &SqlReadOptions {
19350                index_col: Some("id".to_owned()),
19351                ..Default::default()
19352            },
19353            None,
19354            2,
19355        )
19356        .expect("indexed sibling honors options.index_col")
19357        .collect::<Result<Vec<_>, _>>()
19358        .expect("all chunks");
19359        assert_eq!(chunks.len(), 1);
19360        assert!(chunks[0].column("id").is_none());
19361        assert_eq!(
19362            chunks[0].index().labels(),
19363            &[IndexLabel::Int64(1), IndexLabel::Int64(2)]
19364        );
19365        assert_eq!(
19366            chunks[0].column("val").unwrap().values(),
19367            &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
19368        );
19369    }
19370
19371    #[cfg(feature = "sql-sqlite")]
19372    #[test]
19373    fn read_sql_chunks_with_options_rejects_options_index_col() {
19374        let conn = make_sql_test_conn();
19375        super::SqlConnection::execute_batch(
19376            &conn,
19377            "CREATE TABLE i8kja_query_chunks_reject (id INTEGER, val TEXT);",
19378        )
19379        .unwrap();
19380        super::SqlConnection::execute_batch(
19381            &conn,
19382            "INSERT INTO i8kja_query_chunks_reject VALUES (1, 'a');",
19383        )
19384        .unwrap();
19385
19386        let err = read_sql_chunks_with_options(
19387            &conn,
19388            "SELECT * FROM i8kja_query_chunks_reject",
19389            &SqlReadOptions {
19390                index_col: Some("id".to_owned()),
19391                ..Default::default()
19392            },
19393            2,
19394        )
19395        .expect_err("options.index_col on non-indexed entrypoint must be rejected");
19396        assert!(
19397            matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_chunks_with_options_and_index_col")),
19398            "expected typed error pointing to the _and_index_col variant, got: {err:?}"
19399        );
19400
19401        let err = read_sql_query_chunks_with_options(
19402            &conn,
19403            "SELECT * FROM i8kja_query_chunks_reject",
19404            &SqlReadOptions {
19405                index_col: Some("id".to_owned()),
19406                ..Default::default()
19407            },
19408            2,
19409        )
19410        .expect_err("query delegator should propagate the rejection");
19411        assert!(
19412            matches!(&err, IoError::Sql(msg) if msg.contains("index_col") && msg.contains("read_sql_query_chunks_with_options_and_index_col")),
19413            "expected query-specific _and_index_col suggestion, got: {err:?}"
19414        );
19415    }
19416
19417    // br-frankenpandas-t1777: query readers can't apply options.columns
19418    // (caller writes the SELECT, projection is fixed). Silently ignoring
19419    // diverged from the table-reader sibling. All 7 query-reader entry
19420    // points (3 foundations + 4 delegators) must reject options.columns
19421    // with a typed error pointing to the appropriate table reader.
19422    #[cfg(feature = "sql-sqlite")]
19423    #[test]
19424    fn read_sql_with_options_rejects_options_columns_across_query_entrypoints() {
19425        let conn = make_sql_test_conn();
19426        super::SqlConnection::execute_batch(
19427            &conn,
19428            "CREATE TABLE t1777_query_cols_reject (id INTEGER, val TEXT);",
19429        )
19430        .unwrap();
19431        super::SqlConnection::execute_batch(
19432            &conn,
19433            "INSERT INTO t1777_query_cols_reject VALUES (1, 'a'), (2, 'b');",
19434        )
19435        .unwrap();
19436
19437        fn assert_columns_rejection(err: &IoError, expected_sibling: &str) {
19438            assert!(
19439                matches!(err, IoError::Sql(msg)
19440                    if msg.contains("options.columns") && msg.contains(expected_sibling)),
19441                "expected options.columns error pointing to `{expected_sibling}`, got: {err:?}"
19442            );
19443        }
19444
19445        let opts_with_cols = || SqlReadOptions {
19446            columns: Some(vec!["id".to_owned()]),
19447            ..Default::default()
19448        };
19449
19450        // 1. read_sql_with_options (foundation, full-frame)
19451        let err = read_sql_with_options(
19452            &conn,
19453            "SELECT id, val FROM t1777_query_cols_reject",
19454            &opts_with_cols(),
19455        )
19456        .expect_err("read_sql_with_options must reject options.columns");
19457        assert_columns_rejection(&err, "read_sql_table_with_options");
19458
19459        // 2. read_sql_chunks_with_options (foundation, chunked)
19460        let err = read_sql_chunks_with_options(
19461            &conn,
19462            "SELECT id, val FROM t1777_query_cols_reject",
19463            &opts_with_cols(),
19464            2,
19465        )
19466        .expect_err("read_sql_chunks_with_options must reject options.columns");
19467        assert_columns_rejection(&err, "read_sql_table_chunks_with_options");
19468
19469        // 3. read_sql_chunks_with_options_and_index_col (foundation, indexed chunked)
19470        let err = read_sql_chunks_with_options_and_index_col(
19471            &conn,
19472            "SELECT id, val FROM t1777_query_cols_reject",
19473            &opts_with_cols(),
19474            Some("id"),
19475            2,
19476        )
19477        .expect_err("indexed chunks must reject options.columns");
19478        assert_columns_rejection(&err, "read_sql_table_chunks_with_options_and_index_col");
19479
19480        // 4. read_sql_query_with_options (delegator → read_sql_with_options)
19481        let err = read_sql_query_with_options(
19482            &conn,
19483            "SELECT id, val FROM t1777_query_cols_reject",
19484            &opts_with_cols(),
19485        )
19486        .expect_err("read_sql_query_with_options must propagate the rejection");
19487        assert_columns_rejection(&err, "read_sql_table_with_options");
19488
19489        // 5. read_sql_query_with_options_and_index_col (delegator)
19490        let err = read_sql_query_with_options_and_index_col(
19491            &conn,
19492            "SELECT id, val FROM t1777_query_cols_reject",
19493            &opts_with_cols(),
19494            Some("id"),
19495        )
19496        .expect_err("indexed query reader must reject options.columns");
19497        assert_columns_rejection(&err, "read_sql_table_with_options");
19498
19499        // 6. read_sql_query_chunks_with_options (delegator → read_sql_chunks_with_options)
19500        let err = read_sql_query_chunks_with_options(
19501            &conn,
19502            "SELECT id, val FROM t1777_query_cols_reject",
19503            &opts_with_cols(),
19504            2,
19505        )
19506        .expect_err("query chunks delegator must reject options.columns");
19507        assert_columns_rejection(&err, "read_sql_table_chunks_with_options");
19508
19509        // 7. read_sql_query_chunks_with_options_and_index_col (delegator)
19510        let err = read_sql_query_chunks_with_options_and_index_col(
19511            &conn,
19512            "SELECT id, val FROM t1777_query_cols_reject",
19513            &opts_with_cols(),
19514            Some("id"),
19515            2,
19516        )
19517        .expect_err("indexed query chunks delegator must reject options.columns");
19518        assert_columns_rejection(&err, "read_sql_table_chunks_with_options_and_index_col");
19519    }
19520
19521    // br-frankenpandas-t1777: table readers must continue to honor
19522    // options.columns (proves the cleared-options pass-through to the
19523    // query foundation didn't accidentally break the columns projection).
19524    #[cfg(feature = "sql-sqlite")]
19525    #[test]
19526    fn read_sql_table_with_options_still_honors_options_columns_after_t1777() {
19527        let conn = make_sql_test_conn();
19528        super::SqlConnection::execute_batch(
19529            &conn,
19530            "CREATE TABLE t1777_table_cols_honor (id INTEGER, val TEXT, secret TEXT);",
19531        )
19532        .unwrap();
19533        super::SqlConnection::execute_batch(
19534            &conn,
19535            "INSERT INTO t1777_table_cols_honor VALUES (1, 'a', 'x'), (2, 'b', 'y');",
19536        )
19537        .unwrap();
19538
19539        // Full-frame table reader with columns should project `id, val`
19540        // and drop `secret`.
19541        let frame = read_sql_table_with_options(
19542            &conn,
19543            "t1777_table_cols_honor",
19544            &SqlReadOptions {
19545                columns: Some(vec!["id".to_owned(), "val".to_owned()]),
19546                ..Default::default()
19547            },
19548        )
19549        .expect("table reader honors options.columns");
19550        assert_eq!(frame.column_names(), vec!["id", "val"]);
19551        assert!(frame.column("secret").is_none());
19552
19553        // Chunked table reader, same behavior per chunk.
19554        let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options(
19555            &conn,
19556            "t1777_table_cols_honor",
19557            &SqlReadOptions {
19558                columns: Some(vec!["id".to_owned(), "val".to_owned()]),
19559                ..Default::default()
19560            },
19561            1,
19562        )
19563        .expect("chunked table reader honors options.columns")
19564        .collect::<Result<Vec<_>, _>>()
19565        .expect("all chunks");
19566        assert_eq!(chunks.len(), 2);
19567        for c in &chunks {
19568            assert_eq!(c.column_names(), vec!["id", "val"]);
19569            assert!(c.column("secret").is_none());
19570        }
19571
19572        // Indexed full-frame table reader: columns + index_col compose.
19573        let frame = read_sql_table_with_options_and_index_col(
19574            &conn,
19575            "t1777_table_cols_honor",
19576            &SqlReadOptions {
19577                columns: Some(vec!["val".to_owned()]),
19578                ..Default::default()
19579            },
19580            Some("id"),
19581        )
19582        .expect("indexed table reader honors options.columns");
19583        assert_eq!(frame.index().name(), Some("id"));
19584        assert_eq!(frame.column_names(), vec!["val"]);
19585        assert!(frame.column("id").is_none());
19586        assert!(frame.column("secret").is_none());
19587
19588        // Indexed chunked table reader.
19589        let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
19590            &conn,
19591            "t1777_table_cols_honor",
19592            &SqlReadOptions {
19593                columns: Some(vec!["val".to_owned()]),
19594                ..Default::default()
19595            },
19596            Some("id"),
19597            1,
19598        )
19599        .expect("indexed chunked table reader honors options.columns")
19600        .collect::<Result<Vec<_>, _>>()
19601        .expect("all chunks");
19602        assert_eq!(chunks.len(), 2);
19603        for c in &chunks {
19604            assert_eq!(c.column_names(), vec!["val"]);
19605            assert!(c.column("id").is_none());
19606            assert!(c.column("secret").is_none());
19607        }
19608    }
19609
19610    #[cfg(feature = "sql-sqlite")]
19611    #[test]
19612    fn sql_read_table_with_options_and_index_col_applies_options_before_indexing() {
19613        let conn = make_sql_test_conn();
19614        conn.execute_batch(
19615            "CREATE TABLE table_options_index (ts TEXT, amount TEXT, label TEXT);
19616             INSERT INTO table_options_index (ts, amount, label) VALUES
19617                ('2024-04-01', '$10.00', 'a'),
19618                ('2024-04-02 03:04:05', '20.50', 'b');",
19619        )
19620        .expect("create table_options_index table");
19621
19622        let frame = read_sql_table_with_options_and_index_col(
19623            &conn,
19624            "table_options_index",
19625            &SqlReadOptions {
19626                params: None,
19627                parse_dates: Some(vec!["ts".to_owned()]),
19628                coerce_float: true,
19629                dtype: None,
19630                schema: None,
19631                columns: None,
19632                index_col: None,
19633            },
19634            Some("ts"),
19635        )
19636        .expect("read table with options and index_col");
19637
19638        assert_eq!(frame.index().name(), Some("ts"));
19639        assert_eq!(
19640            frame.index().labels(),
19641            &[
19642                IndexLabel::Utf8("2024-04-01 00:00:00".to_owned()),
19643                IndexLabel::Utf8("2024-04-02 03:04:05".to_owned())
19644            ]
19645        );
19646        assert!(frame.column("ts").is_none());
19647        assert_eq!(
19648            frame.column("amount").unwrap().values(),
19649            &[Scalar::Float64(10.0), Scalar::Float64(20.5)]
19650        );
19651    }
19652
19653    #[cfg(feature = "sql-sqlite")]
19654    #[test]
19655    fn sql_read_table_with_options_and_index_col_none_keeps_options_and_range_index() {
19656        let conn = make_sql_test_conn();
19657        conn.execute_batch(
19658            "CREATE TABLE table_options_no_index (id INTEGER, amount TEXT);
19659             INSERT INTO table_options_no_index (id, amount) VALUES
19660                (1, '$1.25'),
19661                (2, '$2.50');",
19662        )
19663        .expect("create table_options_no_index table");
19664
19665        let frame = read_sql_table_with_options_and_index_col(
19666            &conn,
19667            "table_options_no_index",
19668            &SqlReadOptions {
19669                params: None,
19670                parse_dates: None,
19671                coerce_float: true,
19672                dtype: None,
19673                schema: None,
19674                columns: None,
19675                index_col: None,
19676            },
19677            None,
19678        )
19679        .expect("read table with options and no index_col");
19680
19681        assert_eq!(
19682            frame.index().labels(),
19683            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
19684        );
19685        assert_eq!(frame.column_names(), vec!["id", "amount"]);
19686        assert_eq!(
19687            frame.column("amount").unwrap().values(),
19688            &[Scalar::Float64(1.25), Scalar::Float64(2.5)]
19689        );
19690    }
19691
19692    #[cfg(feature = "sql-sqlite")]
19693    #[test]
19694    fn sql_read_table_chunks_with_options_and_index_col_promotes_each_chunk_index() {
19695        let conn = make_sql_test_conn();
19696        conn.execute_batch(
19697            "CREATE TABLE table_options_index_chunked (ts TEXT, amount TEXT);
19698             INSERT INTO table_options_index_chunked (ts, amount) VALUES
19699                ('2024-05-01', '$10.00'),
19700                ('2024-05-02', '$20.00'),
19701                ('2024-05-03', '$30.50');",
19702        )
19703        .expect("create table_options_index_chunked table");
19704
19705        let chunks = read_sql_table_chunks_with_options_and_index_col(
19706            &conn,
19707            "table_options_index_chunked",
19708            &SqlReadOptions {
19709                params: None,
19710                parse_dates: Some(vec!["ts".to_owned()]),
19711                coerce_float: true,
19712                dtype: None,
19713                schema: None,
19714                columns: None,
19715                index_col: None,
19716            },
19717            Some("ts"),
19718            2,
19719        )
19720        .expect("table indexed option chunk iterator")
19721        .collect::<Result<Vec<_>, _>>()
19722        .expect("all chunks");
19723
19724        assert_eq!(chunks.len(), 2);
19725        assert_eq!(chunks[0].index().name(), Some("ts"));
19726        assert_eq!(
19727            chunks[0].index().labels(),
19728            &[
19729                IndexLabel::Utf8("2024-05-01 00:00:00".to_owned()),
19730                IndexLabel::Utf8("2024-05-02 00:00:00".to_owned())
19731            ]
19732        );
19733        assert!(chunks[0].column("ts").is_none());
19734        assert_eq!(
19735            chunks[0].column("amount").unwrap().values(),
19736            &[Scalar::Float64(10.0), Scalar::Float64(20.0)]
19737        );
19738        assert_eq!(
19739            chunks[1].index().labels(),
19740            &[IndexLabel::Utf8("2024-05-03 00:00:00".to_owned())]
19741        );
19742        assert_eq!(
19743            chunks[1].column("amount").unwrap().values(),
19744            &[Scalar::Float64(30.5)]
19745        );
19746    }
19747
19748    #[cfg(feature = "sql-sqlite")]
19749    #[test]
19750    fn sql_read_table_chunks_with_options_and_index_col_uses_options_index_when_explicit_none() {
19751        let conn = make_sql_test_conn();
19752        conn.execute_batch(
19753            "CREATE TABLE table_options_struct_index (id INTEGER, amount TEXT);
19754             INSERT INTO table_options_struct_index (id, amount) VALUES
19755                (10, '$10.00'),
19756                (20, '$20.00'),
19757                (30, '$30.50');",
19758        )
19759        .expect("create table_options_struct_index table");
19760
19761        let chunks = read_sql_table_chunks_with_options_and_index_col(
19762            &conn,
19763            "table_options_struct_index",
19764            &SqlReadOptions {
19765                params: None,
19766                parse_dates: None,
19767                coerce_float: true,
19768                dtype: None,
19769                schema: None,
19770                columns: None,
19771                index_col: Some("id".to_owned()),
19772            },
19773            None,
19774            2,
19775        )
19776        .expect("table indexed option chunk iterator")
19777        .collect::<Result<Vec<_>, _>>()
19778        .expect("all chunks");
19779
19780        assert_eq!(chunks.len(), 2);
19781        assert_eq!(
19782            chunks[0].index().labels(),
19783            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
19784        );
19785        assert!(chunks[0].column("id").is_none());
19786        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
19787        assert_eq!(
19788            chunks[1].column("amount").unwrap().values(),
19789            &[Scalar::Float64(30.5)]
19790        );
19791    }
19792
19793    #[cfg(feature = "sql-sqlite")]
19794    #[test]
19795    fn sql_read_table_chunks_with_options_and_index_col_missing_column_errors() {
19796        let conn = make_sql_test_conn();
19797        conn.execute_batch(
19798            "CREATE TABLE table_options_missing_index (id INTEGER, amount TEXT);
19799             INSERT INTO table_options_missing_index (id, amount) VALUES (1, '$10.00');",
19800        )
19801        .expect("create table_options_missing_index table");
19802
19803        let err = read_sql_table_chunks_with_options_and_index_col(
19804            &conn,
19805            "table_options_missing_index",
19806            &SqlReadOptions {
19807                params: None,
19808                parse_dates: None,
19809                coerce_float: true,
19810                dtype: None,
19811                schema: None,
19812                columns: None,
19813                index_col: None,
19814            },
19815            Some("missing"),
19816            1,
19817        )
19818        .expect_err("missing index_col should error during iterator construction");
19819
19820        assert!(matches!(err, IoError::Sql(msg) if msg.contains("index_col")));
19821    }
19822
19823    #[cfg(feature = "sql-sqlite")]
19824    #[test]
19825    fn sql_read_with_parse_dates_coerces_named_columns() {
19826        let conn = make_sql_test_conn();
19827        conn.execute_batch(
19828            "CREATE TABLE events (ts TEXT, value INTEGER);
19829             INSERT INTO events (ts, value) VALUES
19830                ('2024-01-15', 1),
19831                ('2024-02-01 05:06:07', 2);",
19832        )
19833        .expect("create events table");
19834
19835        let frame = read_sql_with_options(
19836            &conn,
19837            "SELECT ts, value FROM events ORDER BY value",
19838            &SqlReadOptions {
19839                params: None,
19840                parse_dates: Some(vec!["ts".to_owned()]),
19841                coerce_float: false,
19842                dtype: None,
19843                schema: None,
19844                columns: None,
19845                index_col: None,
19846            },
19847        )
19848        .expect("read sql with parse_dates");
19849
19850        assert_eq!(
19851            frame.column("ts").unwrap().values()[0],
19852            Scalar::Utf8("2024-01-15 00:00:00".into())
19853        );
19854        assert_eq!(
19855            frame.column("ts").unwrap().values()[1],
19856            Scalar::Utf8("2024-02-01 05:06:07".into())
19857        );
19858        assert_eq!(frame.column("value").unwrap().values()[0], Scalar::Int64(1));
19859        assert_eq!(frame.column("value").unwrap().values()[1], Scalar::Int64(2));
19860    }
19861
19862    #[cfg(feature = "sql-sqlite")]
19863    #[test]
19864    fn sql_read_with_parse_dates_missing_column_errors() {
19865        let conn = make_sql_test_conn();
19866        conn.execute_batch(
19867            "CREATE TABLE metrics (value INTEGER);
19868             INSERT INTO metrics (value) VALUES (1);",
19869        )
19870        .expect("create metrics table");
19871
19872        let err = read_sql_with_options(
19873            &conn,
19874            "SELECT value FROM metrics",
19875            &SqlReadOptions {
19876                params: None,
19877                parse_dates: Some(vec!["ts".to_owned()]),
19878                coerce_float: false,
19879                dtype: None,
19880                schema: None,
19881                columns: None,
19882                index_col: None,
19883            },
19884        )
19885        .expect_err("missing parse_dates column should error");
19886
19887        assert!(
19888            matches!(err, IoError::MissingParseDateColumns(missing) if missing == vec!["ts".to_owned()])
19889        );
19890    }
19891
19892    #[cfg(feature = "sql-sqlite")]
19893    #[test]
19894    fn sql_read_with_params_binds_positional_placeholders() {
19895        let frame = make_test_dataframe();
19896        let conn = make_sql_test_conn();
19897        write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
19898
19899        let filtered = read_sql_with_options(
19900            &conn,
19901            "SELECT ints, names FROM data WHERE ints > ? AND names != ? ORDER BY ints",
19902            &SqlReadOptions {
19903                params: Some(vec![Scalar::Int64(15), Scalar::Utf8("bob".to_owned())]),
19904                parse_dates: None,
19905                coerce_float: false,
19906                dtype: None,
19907                schema: None,
19908                columns: None,
19909                index_col: None,
19910            },
19911        )
19912        .expect("read sql with params");
19913
19914        assert_eq!(filtered.index().len(), 1);
19915        assert_eq!(
19916            filtered.column("ints").unwrap().values(),
19917            &[Scalar::Int64(30)]
19918        );
19919        assert_eq!(
19920            filtered.column("names").unwrap().values(),
19921            &[Scalar::Utf8("carol".into())]
19922        );
19923    }
19924
19925    #[cfg(feature = "sql-sqlite")]
19926    #[test]
19927    fn sql_read_with_params_wrong_arity_errors() {
19928        let frame = make_test_dataframe();
19929        let conn = make_sql_test_conn();
19930        write_sql(&frame, &conn, "data", SqlIfExists::Fail).unwrap();
19931
19932        let err = read_sql_with_options(
19933            &conn,
19934            "SELECT ints FROM data WHERE ints > ? AND names != ?",
19935            &SqlReadOptions {
19936                params: Some(vec![Scalar::Int64(15)]),
19937                parse_dates: None,
19938                coerce_float: false,
19939                dtype: None,
19940                schema: None,
19941                columns: None,
19942                index_col: None,
19943            },
19944        )
19945        .expect_err("wrong arity should error");
19946
19947        assert!(matches!(err, IoError::Sql(msg) if msg.contains("parameter")));
19948    }
19949
19950    #[cfg(feature = "sql-sqlite")]
19951    #[test]
19952    fn sql_read_coerce_float_promotes_decimal_like_text_columns() {
19953        let conn = make_sql_test_conn();
19954        conn.execute_batch(
19955            "CREATE TABLE payments (id INTEGER, amount TEXT, fee TEXT);
19956             INSERT INTO payments (id, amount, fee) VALUES
19957                (1, '12.50', '$1,234.50'),
19958                (2, '-3.25', NULL);",
19959        )
19960        .expect("create payments table");
19961
19962        // Per fd90.41: pandas default for coerce_float is True, and our
19963        // SqlReadOptions::default() now matches. So the bare read_sql
19964        // path coerces decimal-like text columns to Float64 by default.
19965        let default_frame =
19966            read_sql(&conn, "SELECT amount FROM payments ORDER BY id").expect("default read");
19967        assert_eq!(
19968            default_frame.column("amount").unwrap().dtype(),
19969            DType::Float64
19970        );
19971        assert_eq!(
19972            default_frame.column("amount").unwrap().values(),
19973            &[Scalar::Float64(12.5), Scalar::Float64(-3.25)],
19974        );
19975
19976        // Explicitly opting out of coerce_float keeps the raw Utf8.
19977        let no_coerce = read_sql_with_options(
19978            &conn,
19979            "SELECT amount FROM payments ORDER BY id",
19980            &SqlReadOptions {
19981                coerce_float: false,
19982                ..SqlReadOptions::default()
19983            },
19984        )
19985        .expect("read without coerce_float");
19986        assert_eq!(
19987            no_coerce.column("amount").unwrap().values(),
19988            &[
19989                Scalar::Utf8("12.50".to_owned()),
19990                Scalar::Utf8("-3.25".to_owned()),
19991            ],
19992        );
19993
19994        let coerced = read_sql_with_options(
19995            &conn,
19996            "SELECT amount, fee FROM payments ORDER BY id",
19997            &SqlReadOptions {
19998                coerce_float: true,
19999                ..SqlReadOptions::default()
20000            },
20001        )
20002        .expect("read with coerce_float");
20003
20004        let amount = coerced.column("amount").expect("amount");
20005        assert_eq!(amount.dtype(), DType::Float64);
20006        assert_eq!(
20007            amount.values(),
20008            &[Scalar::Float64(12.5), Scalar::Float64(-3.25)],
20009        );
20010
20011        let fee = coerced.column("fee").expect("fee");
20012        assert_eq!(fee.dtype(), DType::Float64);
20013        assert_eq!(fee.values()[0], Scalar::Float64(1234.5));
20014        assert!(matches!(fee.values()[1], Scalar::Null(NullKind::NaN)));
20015    }
20016
20017    #[cfg(feature = "sql-sqlite")]
20018    #[test]
20019    fn sql_read_coerce_float_leaves_non_numeric_text_columns_unchanged() {
20020        let conn = make_sql_test_conn();
20021        conn.execute_batch(
20022            "CREATE TABLE mixed (id INTEGER, maybe_amount TEXT, label TEXT);
20023             INSERT INTO mixed (id, maybe_amount, label) VALUES
20024                (1, '12.50', 'alpha'),
20025                (2, 'not numeric', '20.0');",
20026        )
20027        .expect("create mixed table");
20028
20029        let frame = read_sql_with_options(
20030            &conn,
20031            "SELECT maybe_amount, label FROM mixed ORDER BY id",
20032            &SqlReadOptions {
20033                coerce_float: true,
20034                dtype: None,
20035                schema: None,
20036                columns: None,
20037                index_col: None,
20038                ..SqlReadOptions::default()
20039            },
20040        )
20041        .expect("read with coerce_float");
20042
20043        assert_eq!(
20044            frame.column("maybe_amount").unwrap().values(),
20045            &[
20046                Scalar::Utf8("12.50".to_owned()),
20047                Scalar::Utf8("not numeric".to_owned()),
20048            ],
20049        );
20050        assert_eq!(
20051            frame.column("label").unwrap().values(),
20052            &[
20053                Scalar::Utf8("alpha".to_owned()),
20054                Scalar::Utf8("20.0".to_owned()),
20055            ],
20056        );
20057    }
20058
20059    #[cfg(feature = "sql-sqlite")]
20060    #[test]
20061    fn sql_read_chunks_batches_rows_and_resets_index_per_chunk() {
20062        let conn = make_sql_test_conn();
20063        conn.execute_batch(
20064            "CREATE TABLE chunked (id INTEGER, name TEXT);
20065             INSERT INTO chunked (id, name) VALUES
20066                (1, 'alpha'),
20067                (2, 'beta'),
20068                (3, 'gamma'),
20069                (4, 'delta'),
20070                (5, 'epsilon');",
20071        )
20072        .expect("create chunked table");
20073
20074        let chunks = read_sql_chunks(&conn, "SELECT id, name FROM chunked ORDER BY id", 2)
20075            .expect("chunk iterator")
20076            .collect::<Result<Vec<_>, _>>()
20077            .expect("all chunks");
20078
20079        assert_eq!(chunks.len(), 3);
20080        assert_eq!(
20081            chunks[0].index().labels(),
20082            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
20083        );
20084        assert_eq!(
20085            chunks[1].index().labels(),
20086            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
20087        );
20088        assert_eq!(chunks[2].index().labels(), &[IndexLabel::Int64(0)]);
20089        assert_eq!(
20090            chunks[0].column("id").unwrap().values(),
20091            &[Scalar::Int64(1), Scalar::Int64(2)]
20092        );
20093        assert_eq!(
20094            chunks[1].column("id").unwrap().values(),
20095            &[Scalar::Int64(3), Scalar::Int64(4)]
20096        );
20097        assert_eq!(
20098            chunks[2].column("name").unwrap().values(),
20099            &[Scalar::Utf8("epsilon".to_owned())]
20100        );
20101    }
20102
20103    #[cfg(feature = "sql-sqlite")]
20104    #[test]
20105    fn sql_read_chunks_sqlite_uses_paged_iterator_state() {
20106        let conn = make_sql_test_conn();
20107        conn.execute_batch(
20108            "CREATE TABLE fp_sqlite_paged_chunks (id INTEGER, name TEXT);
20109             INSERT INTO fp_sqlite_paged_chunks (id, name) VALUES
20110                (1, 'alpha'),
20111                (2, 'beta');",
20112        )
20113        .expect("create sqlite_paged_chunks table");
20114
20115        let mut chunks = read_sql_chunks(
20116            &conn,
20117            "SELECT id, name FROM fp_sqlite_paged_chunks ORDER BY id",
20118            1,
20119        )
20120        .expect("chunk iterator");
20121
20122        let initial_debug = format!("{chunks:?}");
20123        assert!(
20124            initial_debug.contains("mode: \"paged\""),
20125            "SQLite chunk reads must use paged mode, got {initial_debug}"
20126        );
20127        assert!(initial_debug.contains("next_offset: 0"));
20128
20129        let first = chunks
20130            .next()
20131            .expect("first chunk")
20132            .expect("first chunk should read");
20133        assert_eq!(first.column("id").unwrap().values(), &[Scalar::Int64(1)]);
20134
20135        let after_first_debug = format!("{chunks:?}");
20136        assert!(after_first_debug.contains("next_offset: 1"));
20137    }
20138
20139    #[cfg(feature = "sql-sqlite")]
20140    #[test]
20141    fn sql_read_chunks_with_options_applies_params_parse_dates_and_coerce_float() {
20142        let conn = make_sql_test_conn();
20143        conn.execute_batch(
20144            "CREATE TABLE events (ts TEXT, amount TEXT, keep INTEGER);
20145             INSERT INTO events (ts, amount, keep) VALUES
20146                ('2024-01-15', '12.50', 0),
20147                ('2024-02-01 05:06:07', '$1,234.50', 1),
20148                ('2024-03-03', '-3.25', 1);",
20149        )
20150        .expect("create events table");
20151
20152        let chunks = read_sql_chunks_with_options(
20153            &conn,
20154            "SELECT ts, amount FROM events WHERE keep = ? ORDER BY ts",
20155            &SqlReadOptions {
20156                params: Some(vec![Scalar::Int64(1)]),
20157                parse_dates: Some(vec!["ts".to_owned()]),
20158                coerce_float: true,
20159                dtype: None,
20160                schema: None,
20161                columns: None,
20162                index_col: None,
20163            },
20164            1,
20165        )
20166        .expect("chunk iterator")
20167        .collect::<Result<Vec<_>, _>>()
20168        .expect("all chunks");
20169
20170        assert_eq!(chunks.len(), 2);
20171        assert_eq!(
20172            chunks[0].column("ts").unwrap().values(),
20173            &[Scalar::Utf8("2024-02-01 05:06:07".to_owned())]
20174        );
20175        assert_eq!(
20176            chunks[0].column("amount").unwrap().values(),
20177            &[Scalar::Float64(1234.5)]
20178        );
20179        assert_eq!(
20180            chunks[1].column("ts").unwrap().values(),
20181            &[Scalar::Utf8("2024-03-03 00:00:00".to_owned())]
20182        );
20183        assert_eq!(
20184            chunks[1].column("amount").unwrap().values(),
20185            &[Scalar::Float64(-3.25)]
20186        );
20187    }
20188
20189    #[cfg(feature = "sql-sqlite")]
20190    #[test]
20191    fn sql_read_chunks_rejects_zero_chunksize() {
20192        let conn = make_sql_test_conn();
20193
20194        let err =
20195            read_sql_chunks(&conn, "SELECT 1", 0).expect_err("zero chunksize should be rejected");
20196
20197        assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
20198    }
20199
20200    #[cfg(feature = "sql-sqlite")]
20201    #[test]
20202    fn sql_duplicate_column_names_error() {
20203        let conn = make_sql_test_conn();
20204        let err = read_sql(&conn, "SELECT 1 as dup, 2 as dup");
20205        assert!(matches!(err, Err(IoError::DuplicateColumnName(name)) if name == "dup"));
20206    }
20207
20208    #[cfg(feature = "sql-sqlite")]
20209    #[test]
20210    fn sql_if_exists_fail() {
20211        let frame = make_test_dataframe();
20212        let conn = make_sql_test_conn();
20213        write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20214
20215        let err = write_sql(&frame, &conn, "tbl", SqlIfExists::Fail);
20216        assert!(err.is_err());
20217        assert!(matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("already exists")),);
20218    }
20219
20220    #[cfg(feature = "sql-sqlite")]
20221    #[test]
20222    fn sql_if_exists_replace() {
20223        let frame = make_test_dataframe();
20224        let conn = make_sql_test_conn();
20225        write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20226
20227        // Replace should succeed and overwrite.
20228        write_sql(&frame, &conn, "tbl", SqlIfExists::Replace).unwrap();
20229        let frame2 = read_sql_table(&conn, "tbl").unwrap();
20230        assert_eq!(frame2.index().len(), 3);
20231    }
20232
20233    #[cfg(feature = "sql-sqlite")]
20234    #[test]
20235    fn sql_if_exists_append() {
20236        let frame = make_test_dataframe();
20237        let conn = make_sql_test_conn();
20238        write_sql(&frame, &conn, "tbl", SqlIfExists::Fail).unwrap();
20239
20240        // Append should add rows.
20241        write_sql(&frame, &conn, "tbl", SqlIfExists::Append).unwrap();
20242        let frame2 = read_sql_table(&conn, "tbl").unwrap();
20243        assert_eq!(frame2.index().len(), 6); // 3 + 3
20244    }
20245
20246    #[cfg(feature = "sql-sqlite")]
20247    #[test]
20248    fn sql_with_nulls() {
20249        use fp_types::DType;
20250
20251        let mut columns = BTreeMap::new();
20252        columns.insert(
20253            "vals".to_string(),
20254            Column::new(
20255                DType::Float64,
20256                vec![
20257                    Scalar::Float64(1.0),
20258                    Scalar::Null(NullKind::NaN),
20259                    Scalar::Float64(3.0),
20260                ],
20261            )
20262            .unwrap(),
20263        );
20264
20265        let labels = vec![
20266            IndexLabel::Int64(0),
20267            IndexLabel::Int64(1),
20268            IndexLabel::Int64(2),
20269        ];
20270        let frame =
20271            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20272                .unwrap();
20273
20274        let conn = make_sql_test_conn();
20275        write_sql(&frame, &conn, "nulltest", SqlIfExists::Fail).unwrap();
20276        let frame2 = read_sql_table(&conn, "nulltest").unwrap();
20277
20278        assert_eq!(
20279            frame2.column("vals").unwrap().values()[0],
20280            Scalar::Float64(1.0)
20281        );
20282        assert!(frame2.column("vals").unwrap().values()[1].is_missing());
20283        assert_eq!(
20284            frame2.column("vals").unwrap().values()[2],
20285            Scalar::Float64(3.0)
20286        );
20287    }
20288
20289    #[cfg(feature = "sql-sqlite")]
20290    #[test]
20291    fn sql_bool_roundtrip() {
20292        use fp_types::DType;
20293
20294        let mut columns = BTreeMap::new();
20295        columns.insert(
20296            "flags".to_string(),
20297            Column::new(
20298                DType::Bool,
20299                vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
20300            )
20301            .unwrap(),
20302        );
20303
20304        let labels = vec![
20305            IndexLabel::Int64(0),
20306            IndexLabel::Int64(1),
20307            IndexLabel::Int64(2),
20308        ];
20309        let frame = DataFrame::new_with_column_order(
20310            Index::new(labels),
20311            columns,
20312            vec!["flags".to_string()],
20313        )
20314        .unwrap();
20315
20316        let conn = make_sql_test_conn();
20317        write_sql(&frame, &conn, "booltest", SqlIfExists::Fail).unwrap();
20318        let frame2 = read_sql_table(&conn, "booltest").unwrap();
20319
20320        // Bools stored as INTEGER(0/1), read back as Int64.
20321        assert_eq!(
20322            frame2.column("flags").unwrap().values()[0],
20323            Scalar::Int64(1)
20324        );
20325        assert_eq!(
20326            frame2.column("flags").unwrap().values()[1],
20327            Scalar::Int64(0)
20328        );
20329    }
20330
20331    #[cfg(feature = "sql-sqlite")]
20332    #[test]
20333    fn sql_invalid_table_name_rejected() {
20334        let conn = make_sql_test_conn();
20335        let err = read_sql_table(&conn, "Robert'; DROP TABLE students; --");
20336        assert!(err.is_err());
20337        assert!(
20338            matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("invalid table name")),
20339        );
20340    }
20341
20342    #[cfg(feature = "sql-sqlite")]
20343    #[test]
20344    fn sql_empty_table_name_rejected() {
20345        let conn = make_sql_test_conn();
20346        let err = read_sql_table(&conn, "");
20347        assert!(err.is_err());
20348        assert!(
20349            matches!(&err.unwrap_err(), IoError::Sql(msg) if msg.contains("invalid table name")),
20350        );
20351
20352        let frame = make_test_dataframe();
20353        let err = write_sql(&frame, &conn, "", SqlIfExists::Fail);
20354        assert!(err.is_err());
20355    }
20356
20357    #[cfg(feature = "sql-sqlite")]
20358    #[test]
20359    fn sql_empty_result() {
20360        let conn = make_sql_test_conn();
20361        conn.execute_batch("CREATE TABLE empty (x INTEGER, y TEXT)")
20362            .unwrap();
20363        let frame = read_sql_table(&conn, "empty").unwrap();
20364        assert_eq!(frame.index().len(), 0);
20365        assert_eq!(frame.column_names().len(), 2);
20366        assert_eq!(frame.column("x").unwrap().dtype(), DType::Int64);
20367        assert_eq!(frame.column("y").unwrap().dtype(), DType::Utf8);
20368
20369        conn.execute_batch(
20370            "CREATE TABLE typed_nulls (i INTEGER, r REAL, t TEXT);
20371             INSERT INTO typed_nulls VALUES (NULL, NULL, NULL);",
20372        )
20373        .unwrap();
20374        let null_frame = read_sql_table(&conn, "typed_nulls").unwrap();
20375        assert_eq!(null_frame.index().len(), 1);
20376        assert_eq!(null_frame.column("i").unwrap().dtype(), DType::Int64);
20377        assert_eq!(null_frame.column("r").unwrap().dtype(), DType::Float64);
20378        assert_eq!(null_frame.column("t").unwrap().dtype(), DType::Utf8);
20379    }
20380
20381    #[cfg(feature = "sql-sqlite")]
20382    #[test]
20383    fn sql_empty_filtered_query_preserves_declared_dtypes() {
20384        let conn = make_sql_test_conn();
20385        conn.execute_batch(
20386            "CREATE TABLE filtered_empty (i INTEGER, r REAL, t TEXT);
20387             INSERT INTO filtered_empty VALUES (1, 1.25, 'kept');",
20388        )
20389        .unwrap();
20390
20391        let frame = read_sql_with_options(
20392            &conn,
20393            "SELECT i, r, t FROM filtered_empty WHERE i > ?",
20394            &SqlReadOptions {
20395                params: Some(vec![Scalar::Int64(10)]),
20396                ..SqlReadOptions::default()
20397            },
20398        )
20399        .expect("empty filtered query must preserve cursor dtype hints");
20400
20401        assert_eq!(frame.index().len(), 0);
20402        assert_eq!(frame.column_names(), vec!["i", "r", "t"]);
20403        assert_eq!(frame.column("i").unwrap().dtype(), DType::Int64);
20404        assert_eq!(frame.column("r").unwrap().dtype(), DType::Float64);
20405        assert_eq!(frame.column("t").unwrap().dtype(), DType::Utf8);
20406    }
20407
20408    #[cfg(feature = "sql-sqlite")]
20409    #[test]
20410    fn sql_extension_trait() {
20411        let frame = make_test_dataframe();
20412        let conn = make_sql_test_conn();
20413
20414        // Use the extension trait method.
20415        use super::DataFrameIoExt;
20416        frame.to_sql(&conn, "ext_test", SqlIfExists::Fail).unwrap();
20417        frame
20418            .to_sql_with_options(
20419                &conn,
20420                "ext_test_options",
20421                &SqlWriteOptions {
20422                    if_exists: SqlIfExists::Fail,
20423                    index: false,
20424                    index_label: None,
20425                    schema: None,
20426                    dtype: None,
20427                    method: SqlInsertMethod::Single,
20428                    chunksize: None,
20429                },
20430            )
20431            .unwrap();
20432
20433        let frame2 = read_sql_table(&conn, "ext_test").unwrap();
20434        assert_eq!(frame2.index().len(), 3);
20435        let frame3 = read_sql_table(&conn, "ext_test_options").unwrap();
20436        assert_eq!(frame3.index().len(), 3);
20437    }
20438
20439    #[cfg(feature = "sql-sqlite")]
20440    #[test]
20441    fn series_sql_extension_aliases_roundtrip_to_single_column_table() {
20442        use super::SeriesIoExt;
20443
20444        let source = Series::from_values(
20445            "sales",
20446            vec!["r1".into(), "r2".into()],
20447            vec![Scalar::Int64(10), Scalar::Int64(12)],
20448        )
20449        .expect("source series");
20450
20451        let conn = make_sql_test_conn();
20452        source
20453            .to_sql(&conn, "series_ext", SqlIfExists::Fail)
20454            .expect("series to_sql");
20455        let roundtrip = read_sql_table(&conn, "series_ext").expect("read series table");
20456        assert_eq!(roundtrip.column_names(), vec!["index", "sales"]);
20457        assert_eq!(
20458            roundtrip.column("index").expect("index column").values(),
20459            &[Scalar::Utf8("r1".into()), Scalar::Utf8("r2".into())]
20460        );
20461        assert_eq!(
20462            roundtrip.column("sales").expect("sales column").values(),
20463            source.values()
20464        );
20465
20466        source
20467            .to_sql_with_options(
20468                &conn,
20469                "series_ext_no_index",
20470                &SqlWriteOptions {
20471                    if_exists: SqlIfExists::Fail,
20472                    index: false,
20473                    index_label: None,
20474                    schema: None,
20475                    dtype: None,
20476                    method: SqlInsertMethod::Single,
20477                    chunksize: None,
20478                },
20479            )
20480            .expect("series to_sql index false");
20481        let no_index =
20482            read_sql_table(&conn, "series_ext_no_index").expect("read no-index series table");
20483        assert_eq!(no_index.column_names(), vec!["sales"]);
20484        assert_eq!(
20485            no_index.column("sales").expect("sales column").values(),
20486            source.values()
20487        );
20488    }
20489
20490    // ── Arrow IPC / Feather tests ────────────────────────────────────
20491
20492    #[test]
20493    fn feather_bytes_roundtrip() {
20494        let frame = make_test_dataframe();
20495        let bytes = super::write_feather_bytes(&frame).expect("write feather");
20496        assert!(!bytes.is_empty());
20497
20498        let frame2 = super::read_feather_bytes(&bytes).expect("read feather");
20499        assert_eq!(frame2.index().len(), 3);
20500
20501        // Check all column values survive round-trip exactly.
20502        let ints = frame2.column("ints").unwrap();
20503        assert_eq!(ints.values()[0], Scalar::Int64(10));
20504        assert_eq!(ints.values()[1], Scalar::Int64(20));
20505        assert_eq!(ints.values()[2], Scalar::Int64(30));
20506
20507        let floats = frame2.column("floats").unwrap();
20508        assert_eq!(floats.values()[0], Scalar::Float64(1.5));
20509        assert_eq!(floats.values()[2], Scalar::Float64(3.5));
20510
20511        let names = frame2.column("names").unwrap();
20512        assert_eq!(names.values()[0], Scalar::Utf8("alice".into()));
20513        assert_eq!(names.values()[2], Scalar::Utf8("carol".into()));
20514    }
20515
20516    #[test]
20517    fn feather_row_multiindex_roundtrip_restores_logical_row_axis() {
20518        let frame = make_row_multiindex_test_dataframe();
20519        let bytes = super::write_feather_bytes(&frame).expect("write feather");
20520        let roundtrip = super::read_feather_bytes(&bytes).expect("read feather");
20521
20522        assert!(roundtrip.equals(&frame));
20523        assert!(roundtrip.column("__index_level_0__").is_none());
20524        assert_eq!(
20525            roundtrip
20526                .row_multiindex()
20527                .expect("row multiindex should be restored")
20528                .get_level_values(1)
20529                .unwrap()
20530                .labels(),
20531            frame
20532                .row_multiindex()
20533                .expect("source row multiindex")
20534                .get_level_values(1)
20535                .unwrap()
20536                .labels()
20537        );
20538    }
20539
20540    #[test]
20541    fn feather_file_roundtrip() {
20542        let frame = make_test_dataframe();
20543        let dir = std::env::temp_dir();
20544        let path = dir.join("fp_io_test_feather_roundtrip.feather");
20545
20546        super::write_feather(&frame, &path).expect("write feather file");
20547        let frame2 = super::read_feather(&path).expect("read feather file");
20548        assert_eq!(frame2.index().len(), 3);
20549        assert_eq!(
20550            frame2.column("ints").unwrap().values()[0],
20551            Scalar::Int64(10)
20552        );
20553        std::fs::remove_file(&path).ok();
20554    }
20555
20556    #[test]
20557    fn ipc_stream_bytes_roundtrip() {
20558        let frame = make_test_dataframe();
20559        let bytes = super::write_ipc_stream_bytes(&frame).expect("write ipc stream");
20560        assert!(!bytes.is_empty());
20561
20562        let frame2 = super::read_ipc_stream_bytes(&bytes).expect("read ipc stream");
20563        assert_eq!(frame2.index().len(), 3);
20564        assert_eq!(
20565            frame2.column("ints").unwrap().values()[1],
20566            Scalar::Int64(20)
20567        );
20568        assert_eq!(
20569            frame2.column("names").unwrap().values()[1],
20570            Scalar::Utf8("bob".into())
20571        );
20572    }
20573
20574    #[test]
20575    fn ipc_stream_row_multiindex_roundtrip_restores_logical_row_axis() {
20576        let frame = make_row_multiindex_test_dataframe();
20577        let bytes = super::write_ipc_stream_bytes(&frame).expect("write ipc stream");
20578        let roundtrip = super::read_ipc_stream_bytes(&bytes).expect("read ipc stream");
20579
20580        assert!(roundtrip.equals(&frame));
20581        assert!(roundtrip.row_multiindex().is_some());
20582    }
20583
20584    #[test]
20585    fn feather_with_nulls() {
20586        use fp_types::DType;
20587
20588        let mut columns = BTreeMap::new();
20589        columns.insert(
20590            "vals".to_string(),
20591            Column::new(
20592                DType::Float64,
20593                vec![
20594                    Scalar::Float64(1.0),
20595                    Scalar::Null(NullKind::NaN),
20596                    Scalar::Float64(3.0),
20597                ],
20598            )
20599            .unwrap(),
20600        );
20601
20602        let labels = vec![
20603            IndexLabel::Int64(0),
20604            IndexLabel::Int64(1),
20605            IndexLabel::Int64(2),
20606        ];
20607        let frame =
20608            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20609                .unwrap();
20610
20611        let bytes = super::write_feather_bytes(&frame).expect("write");
20612        let frame2 = super::read_feather_bytes(&bytes).expect("read");
20613
20614        assert_eq!(
20615            frame2.column("vals").unwrap().values()[0],
20616            Scalar::Float64(1.0)
20617        );
20618        assert!(frame2.column("vals").unwrap().values()[1].is_missing());
20619        assert_eq!(
20620            frame2.column("vals").unwrap().values()[2],
20621            Scalar::Float64(3.0)
20622        );
20623    }
20624
20625    #[test]
20626    fn feather_nullable_int_roundtrip_preserves_int_dtype() {
20627        use fp_types::DType;
20628
20629        let mut columns = BTreeMap::new();
20630        columns.insert(
20631            "vals".to_string(),
20632            Column::new(
20633                DType::Int64,
20634                vec![
20635                    Scalar::Int64(10),
20636                    Scalar::Null(NullKind::Null),
20637                    Scalar::Int64(30),
20638                ],
20639            )
20640            .unwrap(),
20641        );
20642
20643        let labels = vec![
20644            IndexLabel::Int64(0),
20645            IndexLabel::Int64(1),
20646            IndexLabel::Int64(2),
20647        ];
20648        let frame =
20649            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["vals".to_string()])
20650                .unwrap();
20651
20652        let bytes = super::write_feather_bytes(&frame).expect("write");
20653        let frame2 = super::read_feather_bytes(&bytes).expect("read");
20654        let vals = frame2.column("vals").unwrap();
20655
20656        assert_eq!(vals.dtype(), DType::Int64);
20657        assert_eq!(vals.values()[0], Scalar::Int64(10));
20658        assert_eq!(vals.values()[1], Scalar::Null(NullKind::Null));
20659        assert_eq!(vals.values()[2], Scalar::Int64(30));
20660    }
20661
20662    #[test]
20663    fn series_arrow_array_nullable_int_roundtrip() {
20664        let series = Series::from_values(
20665            "vals",
20666            vec![
20667                IndexLabel::Utf8("r0".into()),
20668                IndexLabel::Utf8("r1".into()),
20669                IndexLabel::Utf8("r2".into()),
20670            ],
20671            vec![
20672                Scalar::Int64(10),
20673                Scalar::Null(NullKind::Null),
20674                Scalar::Int64(30),
20675            ],
20676        )
20677        .unwrap();
20678
20679        let (dt, arr) = super::series_to_arrow_array(&series).expect("arrow encode");
20680        assert_eq!(dt, ArrowDataType::Int64);
20681
20682        let typed = arr
20683            .as_any()
20684            .downcast_ref::<Int64Array>()
20685            .expect("int64 arrow array");
20686        assert_eq!(typed.value(0), 10);
20687        assert!(typed.is_null(1));
20688        assert_eq!(typed.value(2), 30);
20689
20690        let roundtrip = super::series_from_arrow_array(
20691            series.name(),
20692            series.index().labels().to_vec(),
20693            arr.as_ref(),
20694            &dt,
20695        )
20696        .expect("arrow decode");
20697
20698        assert_eq!(roundtrip.name(), "vals");
20699        assert_eq!(roundtrip.index().labels(), series.index().labels());
20700        assert_eq!(roundtrip.column().dtype(), DType::Int64);
20701        assert_eq!(roundtrip.values(), series.values());
20702    }
20703
20704    #[test]
20705    fn feather_bool_column() {
20706        use fp_types::DType;
20707
20708        let mut columns = BTreeMap::new();
20709        columns.insert(
20710            "flags".to_string(),
20711            Column::new(
20712                DType::Bool,
20713                vec![Scalar::Bool(true), Scalar::Bool(false), Scalar::Bool(true)],
20714            )
20715            .unwrap(),
20716        );
20717
20718        let labels = vec![
20719            IndexLabel::Int64(0),
20720            IndexLabel::Int64(1),
20721            IndexLabel::Int64(2),
20722        ];
20723        let frame = DataFrame::new_with_column_order(
20724            Index::new(labels),
20725            columns,
20726            vec!["flags".to_string()],
20727        )
20728        .unwrap();
20729
20730        let bytes = super::write_feather_bytes(&frame).expect("write");
20731        let frame2 = super::read_feather_bytes(&bytes).expect("read");
20732
20733        assert_eq!(
20734            frame2.column("flags").unwrap().values()[0],
20735            Scalar::Bool(true)
20736        );
20737        assert_eq!(
20738            frame2.column("flags").unwrap().values()[1],
20739            Scalar::Bool(false)
20740        );
20741    }
20742
20743    #[test]
20744    fn feather_preserves_column_order() {
20745        let frame = make_test_dataframe();
20746        let bytes = super::write_feather_bytes(&frame).expect("write");
20747        let frame2 = super::read_feather_bytes(&bytes).expect("read");
20748
20749        assert_eq!(
20750            frame2
20751                .column_names()
20752                .iter()
20753                .map(|s| s.as_str())
20754                .collect::<Vec<_>>(),
20755            frame
20756                .column_names()
20757                .iter()
20758                .map(|s| s.as_str())
20759                .collect::<Vec<_>>()
20760        );
20761    }
20762
20763    #[test]
20764    fn feather_extension_trait() {
20765        use super::DataFrameIoExt;
20766
20767        let frame = make_test_dataframe();
20768        let bytes = frame.to_feather_bytes().unwrap();
20769        let frame2 = super::read_feather_bytes(&bytes).unwrap();
20770        assert_eq!(frame2.index().len(), 3);
20771    }
20772
20773    // ── Adversarial parser tests (frankenpandas-yby) ─────────────────
20774
20775    // ── CsvReadOptions extended params tests (frankenpandas-qoz) ────
20776
20777    #[test]
20778    fn csv_nrows_limits_rows() {
20779        let input = "x\n1\n2\n3\n4\n5\n";
20780        let opts = CsvReadOptions {
20781            nrows: Some(3),
20782            ..Default::default()
20783        };
20784        let frame = read_csv_with_options(input, &opts).expect("parse");
20785        assert_eq!(frame.index().len(), 3);
20786        assert_eq!(frame.column("x").unwrap().values()[2], Scalar::Int64(3));
20787    }
20788
20789    #[test]
20790    fn csv_skiprows_skips_data_rows() {
20791        let input = "x\n1\n2\n3\n4\n5\n";
20792        let opts = CsvReadOptions {
20793            skiprows: 2,
20794            ..Default::default()
20795        };
20796        let frame = read_csv_with_options(input, &opts).expect("parse");
20797        assert_eq!(frame.index().len(), 3); // skipped header + first data row
20798        assert_eq!(frame.column("2").unwrap().values()[0], Scalar::Int64(3));
20799    }
20800
20801    #[test]
20802    fn csv_skiprows_and_nrows_combined() {
20803        let input = "x\n1\n2\n3\n4\n5\n";
20804        let opts = CsvReadOptions {
20805            skiprows: 1,
20806            nrows: Some(2),
20807            ..Default::default()
20808        };
20809        let frame = read_csv_with_options(input, &opts).expect("parse");
20810        assert_eq!(frame.index().len(), 2); // skipped header; read 2 data rows
20811        assert_eq!(frame.column("1").unwrap().values()[0], Scalar::Int64(2));
20812        assert_eq!(frame.column("1").unwrap().values()[1], Scalar::Int64(3));
20813    }
20814
20815    #[test]
20816    fn csv_usecols_selects_columns() {
20817        let input = "a,b,c\n1,2,3\n4,5,6\n";
20818        let opts = CsvReadOptions {
20819            usecols: Some(vec!["a".into(), "c".into()]),
20820            ..Default::default()
20821        };
20822        let frame = read_csv_with_options(input, &opts).expect("parse");
20823        assert_eq!(frame.column_names().len(), 2);
20824        assert!(frame.column("a").is_some());
20825        assert!(frame.column("b").is_none());
20826        assert!(frame.column("c").is_some());
20827        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
20828        assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Int64(6));
20829    }
20830
20831    #[test]
20832    fn csv_usecols_nonexistent_column_errors() {
20833        let input = "a,b\n1,2\n";
20834        let opts = CsvReadOptions {
20835            usecols: Some(vec!["a".into(), "nonexistent".into()]),
20836            ..Default::default()
20837        };
20838        let err = read_csv_with_options(input, &opts).expect_err("missing usecols should error");
20839        assert!(matches!(err, IoError::MissingUsecols(_)));
20840    }
20841
20842    #[test]
20843    fn csv_dtype_coercion() {
20844        let input = "id,score\n1,95\n2,87\n";
20845        let mut dtype_map = std::collections::HashMap::new();
20846        dtype_map.insert("score".to_owned(), fp_types::DType::Float64);
20847        let opts = CsvReadOptions {
20848            dtype: Some(dtype_map),
20849            ..Default::default()
20850        };
20851        let frame = read_csv_with_options(input, &opts).expect("parse");
20852        // score column should be Float64, not Int64
20853        assert_eq!(
20854            frame.column("score").unwrap().values()[0],
20855            Scalar::Float64(95.0)
20856        );
20857        assert_eq!(
20858            frame.column("score").unwrap().values()[1],
20859            Scalar::Float64(87.0)
20860        );
20861        // id column should remain Int64 (not in dtype map)
20862        assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
20863    }
20864
20865    #[test]
20866    fn csv_dtype_coercion_invalid_value_errors() {
20867        let input = "id,score\n1,abc\n";
20868        let mut dtype_map = std::collections::HashMap::new();
20869        dtype_map.insert("score".to_owned(), fp_types::DType::Int64);
20870        let opts = CsvReadOptions {
20871            dtype: Some(dtype_map),
20872            ..Default::default()
20873        };
20874        let err = read_csv_with_options(input, &opts).expect_err("invalid cast must error");
20875        assert!(matches!(
20876            err,
20877            IoError::Column(fp_columnar::ColumnError::Type(
20878                fp_types::TypeError::InvalidCast { .. }
20879            ))
20880        ));
20881    }
20882
20883    #[test]
20884    fn csv_skiprows_beyond_data_errors() {
20885        let input = "x\n1\n2\n";
20886        let opts = CsvReadOptions {
20887            skiprows: 100,
20888            ..Default::default()
20889        };
20890        let err = read_csv_with_options(input, &opts).expect_err("skiprows removes header");
20891        assert!(matches!(err, IoError::MissingHeaders));
20892    }
20893
20894    #[test]
20895    fn csv_nrows_zero_returns_empty() {
20896        let input = "x\n1\n2\n3\n";
20897        let opts = CsvReadOptions {
20898            nrows: Some(0),
20899            ..Default::default()
20900        };
20901        let frame = read_csv_with_options(input, &opts).expect("parse");
20902        assert_eq!(frame.index().len(), 0);
20903    }
20904
20905    #[test]
20906    fn csv_decimal_comma_parses_quoted_float_fields() {
20907        let input = "price\n\"1,50\"\n\"3,75\"\n";
20908        let opts = CsvReadOptions {
20909            decimal: b',',
20910            ..Default::default()
20911        };
20912        let frame = read_csv_with_options(input, &opts).expect("parse");
20913        assert_eq!(
20914            frame.column("price").unwrap().values(),
20915            &[Scalar::Float64(1.5), Scalar::Float64(3.75)]
20916        );
20917    }
20918
20919    #[test]
20920    fn csv_default_decimal_keeps_comma_decimal_strings_as_utf8() {
20921        let input = "price\n\"1,50\"\n";
20922        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20923        assert_eq!(
20924            frame.column("price").unwrap().values(),
20925            &[Scalar::Utf8("1,50".to_owned())]
20926        );
20927    }
20928
20929    #[test]
20930    fn csv_true_false_values_do_not_override_numeric_inference() {
20931        let input = "flag\n1\n0\n";
20932        let opts = CsvReadOptions {
20933            true_values: vec!["1".to_owned()],
20934            false_values: vec!["0".to_owned()],
20935            ..Default::default()
20936        };
20937        let frame = read_csv_with_options(input, &opts).expect("parse");
20938        assert_eq!(
20939            frame.column("flag").unwrap().values(),
20940            &[Scalar::Int64(1), Scalar::Int64(0)]
20941        );
20942    }
20943
20944    #[test]
20945    fn csv_true_false_values_convert_non_numeric_tokens() {
20946        let input = "flag\nyes\nno\n";
20947        let opts = CsvReadOptions {
20948            true_values: vec!["yes".to_owned()],
20949            false_values: vec!["no".to_owned()],
20950            ..Default::default()
20951        };
20952        let frame = read_csv_with_options(input, &opts).expect("parse");
20953        assert_eq!(
20954            frame.column("flag").unwrap().values(),
20955            &[Scalar::Bool(true), Scalar::Bool(false)]
20956        );
20957    }
20958
20959    #[test]
20960    fn csv_default_parsing_keeps_numeric_boolean_tokens_as_ints() {
20961        let input = "flag\n1\n0\n";
20962        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20963        assert_eq!(
20964            frame.column("flag").unwrap().values(),
20965            &[Scalar::Int64(1), Scalar::Int64(0)]
20966        );
20967    }
20968
20969    #[test]
20970    fn csv_missing_numeric_column_preserves_int() {
20971        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
20972        let input = "a,b,c\n,NA,NaN\n1,,x\n";
20973        let frame = read_csv_with_options(input, &CsvReadOptions::default()).expect("parse");
20974        assert_eq!(
20975            frame.column("a").unwrap().values(),
20976            &[Scalar::Null(NullKind::Null), Scalar::Int64(1)]
20977        );
20978        assert!(frame.column("b").unwrap().values()[0].is_missing());
20979        assert!(frame.column("b").unwrap().values()[1].is_missing());
20980        assert_eq!(
20981            frame.column("c").unwrap().values(),
20982            &[Scalar::Null(NullKind::Null), Scalar::Utf8("x".to_owned())]
20983        );
20984    }
20985
20986    #[test]
20987    fn csv_parse_dates_mixed_naive_and_aware_strings_normalizes_per_value() {
20988        // pandas pd.read_csv(parse_dates=["ts"]) normalizes each value
20989        // independently when the column has mixed naive + aware timestamps:
20990        // the naive entry stays naive ("YYYY-MM-DD HH:MM:SS"), and the
20991        // aware entry is rewritten to the offset form ("...+00:00").
20992        // The previous "preserves object" behavior locked the entire
20993        // column to the first inferred timezone pattern and silently
20994        // rejected mismatched values; conformance fixture FP-P2D-429
20995        // documents the pandas-2.2.3 expectation.
20996        let input = "ts,value\n2024-01-15 10:30:00,1\n2024-01-15T10:30:00Z,2\n";
20997        let opts = CsvReadOptions {
20998            parse_dates: Some(vec!["ts".to_owned()]),
20999            ..Default::default()
21000        };
21001        let frame = read_csv_with_options(input, &opts).expect("parse");
21002        assert_eq!(
21003            frame.column("ts").unwrap().values(),
21004            &[
21005                Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21006                Scalar::Utf8("2024-01-15 10:30:00+00:00".to_owned()),
21007            ]
21008        );
21009        assert_eq!(
21010            frame.column("value").unwrap().values(),
21011            &[Scalar::Int64(1), Scalar::Int64(2)]
21012        );
21013    }
21014
21015    #[test]
21016    fn csv_parse_dates_combined_columns_replaces_source_columns() {
21017        let input = "date,time,value\n2024-01-15,10:30:00,1\n2024-01-16,11:45:30,2\n";
21018        let opts = CsvReadOptions {
21019            parse_date_combinations: Some(vec![vec!["date".to_owned(), "time".to_owned()]]),
21020            ..Default::default()
21021        };
21022        let frame = read_csv_with_options(input, &opts).expect("parse");
21023        assert_eq!(frame.column_names(), vec!["date_time", "value"]);
21024        assert_eq!(
21025            frame.column("date_time").unwrap().values(),
21026            &[
21027                Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21028                Scalar::Utf8("2024-01-16 11:45:30".to_owned()),
21029            ]
21030        );
21031        assert!(frame.column("date").is_none());
21032        assert!(frame.column("time").is_none());
21033        assert_eq!(
21034            frame.column("value").unwrap().values(),
21035            &[Scalar::Int64(1), Scalar::Int64(2)]
21036        );
21037    }
21038
21039    #[test]
21040    fn csv_parse_date_combinations_named_uses_caller_supplied_name() {
21041        let input = "date,time,value\n2024-01-15,10:30:00,1\n2024-01-16,11:45:30,2\n";
21042        let opts = CsvReadOptions {
21043            parse_date_combinations_named: Some(vec![(
21044                "timestamp".to_owned(),
21045                vec!["date".to_owned(), "time".to_owned()],
21046            )]),
21047            ..Default::default()
21048        };
21049        let frame = read_csv_with_options(input, &opts).expect("parse");
21050        // Dict-style rename: combined column named "timestamp" rather than
21051        // the default underscore-joined "date_time".
21052        assert_eq!(frame.column_names(), vec!["timestamp", "value"]);
21053        assert!(frame.column("date").is_none());
21054        assert!(frame.column("time").is_none());
21055        assert_eq!(
21056            frame.column("timestamp").unwrap().values(),
21057            &[
21058                Scalar::Utf8("2024-01-15 10:30:00".to_owned()),
21059                Scalar::Utf8("2024-01-16 11:45:30".to_owned()),
21060            ]
21061        );
21062    }
21063
21064    #[test]
21065    fn csv_parse_date_combinations_named_multiple_groups() {
21066        let input = "d1,t1,d2,t2,value\n2024-01-01,09:00:00,2024-01-01,17:00:00,10\n2024-02-01,09:00:00,2024-02-01,17:00:00,20\n";
21067        let opts = CsvReadOptions {
21068            parse_date_combinations_named: Some(vec![
21069                ("start".to_owned(), vec!["d1".to_owned(), "t1".to_owned()]),
21070                ("end".to_owned(), vec!["d2".to_owned(), "t2".to_owned()]),
21071            ]),
21072            ..Default::default()
21073        };
21074        let frame = read_csv_with_options(input, &opts).expect("parse");
21075        let names = frame.column_names();
21076        assert!(names.iter().any(|n| n.as_str() == "start"));
21077        assert!(names.iter().any(|n| n.as_str() == "end"));
21078        assert!(!names.iter().any(|n| n.as_str() == "d1"));
21079        assert!(!names.iter().any(|n| n.as_str() == "t2"));
21080        assert_eq!(
21081            frame.column("value").unwrap().values(),
21082            &[Scalar::Int64(10), Scalar::Int64(20)]
21083        );
21084        assert_eq!(
21085            frame.column("start").unwrap().values(),
21086            &[
21087                Scalar::Utf8("2024-01-01 09:00:00".to_owned()),
21088                Scalar::Utf8("2024-02-01 09:00:00".to_owned()),
21089            ]
21090        );
21091    }
21092
21093    #[test]
21094    fn csv_parse_date_combinations_named_rejects_duplicate_output_names() {
21095        let input = "a,b,c,d\n2024,01,2024,02\n";
21096        let opts = CsvReadOptions {
21097            parse_date_combinations_named: Some(vec![
21098                ("ts".to_owned(), vec!["a".to_owned(), "b".to_owned()]),
21099                ("ts".to_owned(), vec!["c".to_owned(), "d".to_owned()]),
21100            ]),
21101            ..Default::default()
21102        };
21103        let err = read_csv_with_options(input, &opts).unwrap_err();
21104        assert!(matches!(err, IoError::DuplicateColumnName(name) if name == "ts"));
21105    }
21106
21107    #[test]
21108    fn csv_parse_date_combinations_named_rejects_missing_source_column() {
21109        let input = "date,time,value\n2024-01-01,09:00:00,1\n";
21110        let opts = CsvReadOptions {
21111            parse_date_combinations_named: Some(vec![(
21112                "ts".to_owned(),
21113                vec!["date".to_owned(), "missing".to_owned()],
21114            )]),
21115            ..Default::default()
21116        };
21117        let err = read_csv_with_options(input, &opts).unwrap_err();
21118        assert!(matches!(err, IoError::MissingParseDateColumns(_)));
21119    }
21120
21121    #[test]
21122    fn csv_parse_date_combinations_named_empty_sources_skipped() {
21123        let input = "a,b\n1,2\n";
21124        let opts = CsvReadOptions {
21125            parse_date_combinations_named: Some(vec![("unused".to_owned(), Vec::new())]),
21126            ..Default::default()
21127        };
21128        let frame = read_csv_with_options(input, &opts).expect("parse");
21129        // Empty source list is a no-op; original columns remain.
21130        assert_eq!(frame.column_names(), vec!["a", "b"]);
21131    }
21132
21133    // ── JSONL tests (frankenpandas-sue) ──────────────────────────────
21134
21135    #[test]
21136    fn jsonl_write_read_roundtrip() {
21137        let frame = make_test_dataframe();
21138        let jsonl = super::write_jsonl_string(&frame).expect("JSONL write failed");
21139
21140        // Each line should be a valid JSON object.
21141        let line_count = jsonl.lines().count();
21142        assert_eq!(line_count, 3, "3 rows = 3 lines");
21143
21144        let back = super::read_jsonl_str(&jsonl).expect("JSONL read failed");
21145        assert_eq!(back.index().len(), 3);
21146        assert_eq!(back.column("ints").unwrap().values()[0], Scalar::Int64(10));
21147        assert_eq!(
21148            back.column("names").unwrap().values()[2],
21149            Scalar::Utf8("carol".into())
21150        );
21151    }
21152
21153    #[test]
21154    fn jsonl_preserves_column_order() {
21155        let input = r#"
21156{"b":1,"a":2}
21157{"c":3}
21158"#;
21159        let frame = super::read_jsonl_str(input).expect("JSONL read failed");
21160        let order: Vec<&str> = frame
21161            .column_names()
21162            .iter()
21163            .map(|name| name.as_str())
21164            .collect();
21165        assert_eq!(order, vec!["b", "a", "c"]);
21166    }
21167
21168    #[test]
21169    fn jsonl_each_line_is_valid_json() {
21170        let frame = make_test_dataframe();
21171        let jsonl = super::write_jsonl_string(&frame).unwrap();
21172
21173        for (i, line) in jsonl.lines().enumerate() {
21174            let parsed: serde_json::Value =
21175                serde_json::from_str(line).expect("jsonl line must be valid JSON");
21176            assert!(parsed.is_object(), "line {i} must be a JSON object");
21177        }
21178    }
21179
21180    #[test]
21181    fn jsonl_with_nulls() {
21182        use fp_types::DType;
21183
21184        let mut columns = BTreeMap::new();
21185        columns.insert(
21186            "v".to_string(),
21187            Column::new(
21188                DType::Float64,
21189                vec![
21190                    Scalar::Float64(1.0),
21191                    Scalar::Null(NullKind::NaN),
21192                    Scalar::Float64(3.0),
21193                ],
21194            )
21195            .unwrap(),
21196        );
21197        let labels = vec![
21198            IndexLabel::Int64(0),
21199            IndexLabel::Int64(1),
21200            IndexLabel::Int64(2),
21201        ];
21202        let frame =
21203            DataFrame::new_with_column_order(Index::new(labels), columns, vec!["v".to_string()])
21204                .unwrap();
21205
21206        let jsonl = super::write_jsonl_string(&frame).unwrap();
21207        let back = super::read_jsonl_str(&jsonl).unwrap();
21208        assert!(back.column("v").unwrap().values()[1].is_missing());
21209    }
21210
21211    #[test]
21212    fn jsonl_records_write_preserves_nullable_int_column() {
21213        // DISC-011: Nullable extension Int64 dtype parity - Int64 preserved, not promoted to Float64.
21214        let frame = DataFrame::from_dict(
21215            &["a"],
21216            vec![("a", vec![Scalar::Int64(1), Scalar::Null(NullKind::Null)])],
21217        )
21218        .unwrap();
21219
21220        let jsonl = super::write_jsonl_string(&frame).expect("write jsonl");
21221        let rows = jsonl
21222            .lines()
21223            .map(|line| serde_json::from_str::<serde_json::Value>(line).unwrap())
21224            .collect::<Vec<_>>();
21225
21226        assert_eq!(
21227            rows,
21228            vec![serde_json::json!({"a": 1}), serde_json::json!({"a": null})]
21229        );
21230    }
21231
21232    #[test]
21233    fn jsonl_empty_input() {
21234        let back = super::read_jsonl_str("").expect("empty JSONL must parse");
21235        assert_eq!(back.index().len(), 0);
21236    }
21237
21238    #[test]
21239    fn jsonl_blank_lines_skipped() {
21240        let input = "{\"a\":1}\n\n{\"a\":2}\n\n";
21241        let back = super::read_jsonl_str(input).expect("JSONL with blanks must parse");
21242        assert_eq!(back.index().len(), 2);
21243    }
21244
21245    #[test]
21246    fn jsonl_non_object_line_errors() {
21247        let input = "{\"a\":1}\n[1,2,3]\n";
21248        let err = super::read_jsonl_str(input);
21249        assert!(err.is_err());
21250    }
21251
21252    #[test]
21253    fn jsonl_different_keys_across_rows() {
21254        // Rows with different keys should produce union of all columns.
21255        let input = "{\"a\":1,\"b\":2}\n{\"a\":3,\"c\":4}\n";
21256        let frame = super::read_jsonl_str(input).expect("JSONL with different keys must parse");
21257        assert_eq!(frame.index().len(), 2);
21258        // Should have columns a, b, c (union of all keys).
21259        assert!(frame.column("a").is_some(), "column a must exist");
21260        assert!(frame.column("b").is_some(), "column b must exist");
21261        assert!(frame.column("c").is_some(), "column c must exist");
21262        // Row 0: a=1, b=2, c=null.
21263        assert_eq!(frame.column("a").unwrap().values()[0], Scalar::Int64(1));
21264        assert_eq!(frame.column("b").unwrap().values()[0], Scalar::Float64(2.0));
21265        assert!(frame.column("c").unwrap().values()[0].is_missing());
21266        // Row 1: a=3, b=null, c=4.
21267        assert_eq!(frame.column("a").unwrap().values()[1], Scalar::Int64(3));
21268        assert!(frame.column("b").unwrap().values()[1].is_missing());
21269        assert_eq!(frame.column("c").unwrap().values()[1], Scalar::Float64(4.0));
21270    }
21271
21272    #[test]
21273    fn adversarial_csv_very_long_field() {
21274        // A single field with >100K characters should parse without panic.
21275        let long_val = "x".repeat(200_000);
21276        let input = format!("col\n{long_val}\n");
21277        let frame = read_csv_str(&input).expect("long field must parse");
21278        assert_eq!(frame.index().len(), 1);
21279        match &frame.column("col").unwrap().values()[0] {
21280            Scalar::Utf8(s) => assert_eq!(s.len(), 200_000),
21281            other => assert!(
21282                matches!(other, Scalar::Utf8(_)),
21283                "expected Utf8 for long field"
21284            ),
21285        }
21286    }
21287
21288    #[test]
21289    fn adversarial_csv_many_columns() {
21290        // CSV with 1000 columns should parse correctly.
21291        let ncols = 1000;
21292        let headers: Vec<String> = (0..ncols).map(|i| format!("c{i}")).collect();
21293        let mut csv = headers.join(",");
21294        csv.push('\n');
21295        let vals: Vec<String> = (0..ncols).map(|i| i.to_string()).collect();
21296        csv.push_str(&vals.join(","));
21297        csv.push('\n');
21298
21299        let frame = read_csv_str(&csv).expect("1000-column CSV must parse");
21300        assert_eq!(frame.columns().len(), ncols);
21301        assert_eq!(frame.index().len(), 1);
21302    }
21303
21304    #[test]
21305    fn adversarial_csv_empty_rows() {
21306        // CSV with empty rows between data should handle gracefully.
21307        // The csv crate skips truly empty records, but rows with the right
21308        // number of empty fields produce null values.
21309        let input = "a,b\n1,2\n,\n3,4\n";
21310        let frame = read_csv_str(input).expect("parse");
21311        assert_eq!(frame.index().len(), 3);
21312        // Row 1 (index 1) has empty fields → null
21313        assert!(frame.column("a").unwrap().values()[1].is_missing());
21314    }
21315
21316    #[test]
21317    fn adversarial_csv_field_with_newlines_in_quotes() {
21318        // Embedded newlines in quoted fields must not break row boundaries.
21319        let input = "msg\n\"line1\nline2\nline3\"\n\"single\"\n";
21320        let frame = read_csv_str(input).expect("quoted newlines must parse");
21321        assert_eq!(frame.index().len(), 2);
21322    }
21323
21324    #[test]
21325    fn adversarial_csv_header_only_no_data() {
21326        let input = "x,y,z\n";
21327        let frame = read_csv_str(input).expect("header-only must parse");
21328        assert_eq!(frame.index().len(), 0);
21329        assert_eq!(frame.columns().len(), 3);
21330    }
21331
21332    #[test]
21333    fn adversarial_json_deeply_nested_values() {
21334        // JSON with nested objects as values should store them as strings.
21335        let input = r#"[{"a":1,"b":{"nested":"value"}}]"#;
21336        let frame = read_json_str(input, JsonOrient::Records).expect("nested JSON must parse");
21337        assert_eq!(frame.index().len(), 1);
21338        // The nested object becomes a Utf8 representation.
21339        let b_val = &frame.column("b").unwrap().values()[0];
21340        assert!(matches!(b_val, Scalar::Utf8(_)));
21341    }
21342
21343    #[test]
21344    fn adversarial_json_i64_max_value() {
21345        // JSON with values at i64 boundary.
21346        let input = format!(r#"[{{"v":{}}}]"#, i64::MAX);
21347        let frame = read_json_str(&input, JsonOrient::Records).expect("i64::MAX must parse");
21348        assert_eq!(
21349            frame.column("v").unwrap().values()[0],
21350            Scalar::Int64(i64::MAX)
21351        );
21352    }
21353
21354    #[test]
21355    fn adversarial_json_i64_min_value() {
21356        let input = format!(r#"[{{"v":{}}}]"#, i64::MIN);
21357        let frame = read_json_str(&input, JsonOrient::Records).expect("i64::MIN must parse");
21358        assert_eq!(
21359            frame.column("v").unwrap().values()[0],
21360            Scalar::Int64(i64::MIN)
21361        );
21362    }
21363
21364    #[test]
21365    fn adversarial_json_float_special_values() {
21366        // JSON doesn't natively support Infinity/NaN, but we should handle
21367        // null gracefully.
21368        let input = r#"[{"v":null},{"v":1.7976931348623157e+308}]"#;
21369        let frame = read_json_str(input, JsonOrient::Records).expect("special floats must parse");
21370        assert!(frame.column("v").unwrap().values()[0].is_missing());
21371        // f64::MAX is approximately 1.7976931348623157e+308
21372        if let Scalar::Float64(v) = frame.column("v").unwrap().values()[1] {
21373            assert!(v.is_finite());
21374        }
21375    }
21376
21377    #[test]
21378    fn adversarial_json_empty_records_array() {
21379        let input = r#"[]"#;
21380        let frame = read_json_str(input, JsonOrient::Records).expect("empty array must parse");
21381        assert_eq!(frame.index().len(), 0);
21382    }
21383
21384    #[test]
21385    fn adversarial_json_empty_columns_object() {
21386        let input = r#"{}"#;
21387        let frame = read_json_str(input, JsonOrient::Columns).expect("empty object must parse");
21388        assert_eq!(frame.index().len(), 0);
21389        assert_eq!(frame.columns().len(), 0);
21390    }
21391
21392    #[test]
21393    fn adversarial_csv_unicode_values() {
21394        // CSV with multi-byte UTF-8 characters in values.
21395        let input = "name,emoji\n日本語,🎉\nрусский,🚀\n";
21396        let frame = read_csv_str(input).expect("unicode CSV must parse");
21397        assert_eq!(frame.index().len(), 2);
21398        assert_eq!(
21399            frame.column("name").unwrap().values()[0],
21400            Scalar::Utf8("日本語".into())
21401        );
21402        assert_eq!(
21403            frame.column("emoji").unwrap().values()[1],
21404            Scalar::Utf8("🚀".into())
21405        );
21406    }
21407
21408    #[test]
21409    fn adversarial_csv_single_column_no_trailing_newline() {
21410        let input = "val\n42";
21411        let frame = read_csv_str(input).expect("no trailing newline must parse");
21412        assert_eq!(frame.index().len(), 1);
21413        assert_eq!(frame.column("val").unwrap().values()[0], Scalar::Int64(42));
21414    }
21415
21416    #[cfg(feature = "sql-sqlite")]
21417    #[test]
21418    fn adversarial_sql_large_batch_insert() {
21419        // Insert 10K rows in a single write_sql call.
21420        let n = 10_000;
21421        let vals: Vec<Scalar> = (0..n).map(|i| Scalar::Int64(i as i64)).collect();
21422        let df = fp_frame::DataFrame::from_dict(&["x"], vec![("x", vals)]).unwrap();
21423
21424        let conn = make_sql_test_conn();
21425        write_sql(&df, &conn, "big_table", SqlIfExists::Fail).unwrap();
21426        let back = read_sql_table(&conn, "big_table").unwrap();
21427        assert_eq!(back.index().len(), n);
21428        assert_eq!(
21429            back.column("x").unwrap().values()[n - 1],
21430            Scalar::Int64((n - 1) as i64)
21431        );
21432    }
21433
21434    #[cfg(feature = "sql-sqlite")]
21435    #[test]
21436    fn adversarial_sql_column_name_with_spaces_accepted() {
21437        // Column names with spaces are valid in SQL (quoted identifiers).
21438        // Table names are restricted, but column names go through quoting.
21439        let df = fp_frame::DataFrame::from_dict(
21440            &["has space"],
21441            vec![("has space", vec![Scalar::Int64(1)])],
21442        )
21443        .unwrap();
21444
21445        let conn = make_sql_test_conn();
21446        // This should work since column names are quoted.
21447        let result = write_sql(&df, &conn, "test_spaces", SqlIfExists::Fail);
21448        assert!(
21449            result.is_ok(),
21450            "columns with spaces should work: {:?}",
21451            result.err()
21452        );
21453
21454        let back = read_sql_table(&conn, "test_spaces").unwrap();
21455        assert!(back.column("has space").is_some());
21456    }
21457
21458    #[cfg(feature = "sql-sqlite")]
21459    #[test]
21460    fn adversarial_sql_column_name_with_quotes_accepted() {
21461        let col_name = "has\"quote";
21462        let df =
21463            fp_frame::DataFrame::from_dict(&[col_name], vec![(col_name, vec![Scalar::Int64(7)])])
21464                .unwrap();
21465
21466        let conn = make_sql_test_conn();
21467        let result = write_sql(&df, &conn, "test_quotes", SqlIfExists::Fail);
21468        assert!(
21469            result.is_ok(),
21470            "columns with quotes should work: {:?}",
21471            result.err()
21472        );
21473
21474        let back = read_sql_table(&conn, "test_quotes").unwrap();
21475        assert_eq!(back.column(col_name).unwrap().values()[0], Scalar::Int64(7));
21476    }
21477
21478    // ── SqlConnection capability + dialect probes (br-frankenpandas-6dtf) ────
21479
21480    #[cfg(feature = "sql-sqlite")]
21481    #[test]
21482    fn rusqlite_dialect_name_is_sqlite() {
21483        let conn = make_sql_test_conn();
21484        assert_eq!(super::SqlConnection::dialect_name(&conn), "sqlite");
21485    }
21486
21487    #[cfg(feature = "sql-sqlite")]
21488    #[test]
21489    fn rusqlite_supports_returning_is_true() {
21490        // Bundled SQLite is 3.35+, so RETURNING is supported.
21491        let conn = make_sql_test_conn();
21492        assert!(super::SqlConnection::supports_returning(&conn));
21493    }
21494
21495    #[cfg(feature = "sql-sqlite")]
21496    #[test]
21497    fn rusqlite_max_param_count_is_32766() {
21498        let conn = make_sql_test_conn();
21499        assert_eq!(super::SqlConnection::max_param_count(&conn), Some(32766));
21500    }
21501
21502    #[cfg(feature = "sql-sqlite")]
21503    #[test]
21504    fn rusqlite_with_transaction_commits_on_ok() {
21505        let conn = make_sql_test_conn();
21506        super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_test (x INTEGER)").unwrap();
21507        let result: Result<i64, IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21508            super::SqlConnection::execute_batch(c, "INSERT INTO txn_test VALUES (42)")?;
21509            Ok(42)
21510        });
21511        assert_eq!(result.unwrap(), 42);
21512        // Verify the row committed.
21513        let row_count =
21514            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_test", &[]).unwrap();
21515        assert_eq!(row_count.rows.len(), 1);
21516        assert_eq!(row_count.rows[0][0], Scalar::Int64(1));
21517    }
21518
21519    #[cfg(feature = "sql-sqlite")]
21520    #[test]
21521    fn rusqlite_with_transaction_rolls_back_on_err() {
21522        let conn = make_sql_test_conn();
21523        super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_rollback (x INTEGER)")
21524            .unwrap();
21525        let result: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21526            super::SqlConnection::execute_batch(c, "INSERT INTO txn_rollback VALUES (99)")?;
21527            Err(IoError::Sql("simulated failure".to_string()))
21528        });
21529        assert!(result.is_err());
21530        // Row should NOT have committed.
21531        let row_count =
21532            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_rollback", &[]).unwrap();
21533        assert_eq!(row_count.rows[0][0], Scalar::Int64(0));
21534    }
21535
21536    #[cfg(feature = "sql-sqlite")]
21537    #[test]
21538    fn rusqlite_with_transaction_rolls_back_on_panic() {
21539        let conn = make_sql_test_conn();
21540        super::SqlConnection::execute_batch(&conn, "CREATE TABLE txn_panic (x INTEGER)").unwrap();
21541
21542        let panic_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
21543            let _: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21544                super::SqlConnection::execute_batch(c, "INSERT INTO txn_panic VALUES (99)")?;
21545                std::panic::resume_unwind(Box::new("simulated transaction panic"));
21546            });
21547        }));
21548        assert!(panic_result.is_err());
21549
21550        let row_count =
21551            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM txn_panic", &[]).unwrap();
21552        assert_eq!(row_count.rows[0][0], Scalar::Int64(0));
21553
21554        let result: Result<(), IoError> = super::SqlConnection::with_transaction(&conn, |c| {
21555            super::SqlConnection::execute_batch(c, "INSERT INTO txn_panic VALUES (7)")
21556        });
21557        assert!(result.is_ok());
21558        let rows =
21559            super::SqlConnection::query(&conn, "SELECT x FROM txn_panic ORDER BY x", &[]).unwrap();
21560        assert_eq!(rows.rows, vec![vec![Scalar::Int64(7)]]);
21561    }
21562
21563    #[test]
21564    fn default_capability_probes_are_conservative() {
21565        // A test-double SqlConnection that doesn't override defaults
21566        // should report the conservative-default values from the trait.
21567        struct StubSql;
21568        impl super::SqlConnection for StubSql {
21569            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21570                Ok(super::SqlQueryResult {
21571                    columns: vec![],
21572                    rows: vec![],
21573                })
21574            }
21575            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21576                Ok(())
21577            }
21578            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21579                Ok(false)
21580            }
21581            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21582                Ok(())
21583            }
21584            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21585                "TEXT"
21586            }
21587            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21588                "TEXT"
21589            }
21590        }
21591
21592        let stub = StubSql;
21593        assert_eq!(super::SqlConnection::dialect_name(&stub), "unknown");
21594        assert!(!super::SqlConnection::supports_returning(&stub));
21595        assert_eq!(super::SqlConnection::max_param_count(&stub), None);
21596        // Default with_transaction passes through (no BEGIN/COMMIT).
21597        let result: Result<i64, IoError> = super::SqlConnection::with_transaction(&stub, |_| Ok(7));
21598        assert_eq!(result.unwrap(), 7);
21599        // Default quote_identifier produces ANSI double-quotes.
21600        assert_eq!(
21601            super::SqlConnection::quote_identifier(&stub, "col").unwrap(),
21602            r#""col""#
21603        );
21604    }
21605
21606    // ── quote_identifier tests (br-frankenpandas-2y7w / fd90.10) ────────
21607
21608    #[cfg(feature = "sql-sqlite")]
21609    #[test]
21610    fn rusqlite_quote_identifier_uses_ansi_double_quotes() {
21611        let conn = make_sql_test_conn();
21612        assert_eq!(
21613            super::SqlConnection::quote_identifier(&conn, "users").unwrap(),
21614            r#""users""#
21615        );
21616    }
21617
21618    #[cfg(feature = "sql-sqlite")]
21619    #[test]
21620    fn rusqlite_quote_identifier_doubles_embedded_quotes() {
21621        let conn = make_sql_test_conn();
21622        // Identifier containing a `"` must be escaped by doubling the quote.
21623        assert_eq!(
21624            super::SqlConnection::quote_identifier(&conn, r#"value"raw"#).unwrap(),
21625            r#""value""raw""#
21626        );
21627    }
21628
21629    #[cfg(feature = "sql-sqlite")]
21630    #[test]
21631    fn rusqlite_quote_identifier_rejects_null_bytes() {
21632        let conn = make_sql_test_conn();
21633        let err = super::SqlConnection::quote_identifier(&conn, "evil\0name").expect_err("nul");
21634        assert!(matches!(err, IoError::Sql(_)));
21635    }
21636
21637    #[test]
21638    fn default_quote_identifier_doubles_embedded_quotes() {
21639        // Verify the default impl on a non-overriding stub matches the
21640        // SQLite behavior (ANSI is the shared default for SQLite + Postgres).
21641        struct StubSql;
21642        impl super::SqlConnection for StubSql {
21643            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21644                Ok(super::SqlQueryResult {
21645                    columns: vec![],
21646                    rows: vec![],
21647                })
21648            }
21649            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21650                Ok(())
21651            }
21652            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21653                Ok(false)
21654            }
21655            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21656                Ok(())
21657            }
21658            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21659                "TEXT"
21660            }
21661            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21662                "TEXT"
21663            }
21664        }
21665        let stub = StubSql;
21666        assert_eq!(
21667            super::SqlConnection::quote_identifier(&stub, r#"value"raw"#).unwrap(),
21668            r#""value""raw""#
21669        );
21670        assert!(super::SqlConnection::quote_identifier(&stub, "evil\0").is_err());
21671    }
21672
21673    // ── SqlReadOptions::dtype tests (br-frankenpandas-l9pt / fd90.11) ───
21674
21675    #[cfg(feature = "sql-sqlite")]
21676    #[test]
21677    fn read_sql_dtype_override_int_to_float() {
21678        let conn = make_sql_test_conn();
21679        super::SqlConnection::execute_batch(
21680            &conn,
21681            "CREATE TABLE amounts (amount INTEGER); INSERT INTO amounts VALUES (1), (2), (3);",
21682        )
21683        .unwrap();
21684        let mut dtype_map = BTreeMap::new();
21685        dtype_map.insert("amount".to_owned(), DType::Float64);
21686        let frame = read_sql_with_options(
21687            &conn,
21688            "SELECT amount FROM amounts ORDER BY amount",
21689            &SqlReadOptions {
21690                params: None,
21691                parse_dates: None,
21692                coerce_float: false,
21693                dtype: Some(dtype_map),
21694                schema: None,
21695                columns: None,
21696                index_col: None,
21697            },
21698        )
21699        .expect("read with dtype");
21700        let col = frame.column("amount").expect("amount");
21701        assert_eq!(col.dtype(), DType::Float64);
21702        assert_eq!(col.values()[0], Scalar::Float64(1.0));
21703        assert_eq!(col.values()[2], Scalar::Float64(3.0));
21704    }
21705
21706    #[cfg(feature = "sql-sqlite")]
21707    #[test]
21708    fn read_sql_dtype_override_unsupported_cast_returns_typed_error() {
21709        // This test asserts that when a dtype override cast is unsupported,
21710        // SQL IO surfaces a typed IoError::Sql with diagnostic context, NOT a
21711        // panic and not a silent skip.
21712        let conn = make_sql_test_conn();
21713        super::SqlConnection::execute_batch(
21714            &conn,
21715            "CREATE TABLE labels (id TEXT); INSERT INTO labels VALUES ('yes'), ('no');",
21716        )
21717        .unwrap();
21718        let mut dtype_map = BTreeMap::new();
21719        dtype_map.insert("id".to_owned(), DType::Bool);
21720        let err = read_sql_with_options(
21721            &conn,
21722            "SELECT id FROM labels ORDER BY id",
21723            &SqlReadOptions {
21724                params: None,
21725                parse_dates: None,
21726                coerce_float: false,
21727                dtype: Some(dtype_map),
21728                schema: None,
21729                columns: None,
21730                index_col: None,
21731            },
21732        )
21733        .expect_err("expected dtype override error");
21734        match err {
21735            IoError::Sql(message) => {
21736                assert!(
21737                    message.contains("dtype override on column 'id'"),
21738                    "unexpected error message: {message}"
21739                );
21740                assert!(
21741                    message.contains("Bool"),
21742                    "unexpected error message: {message}"
21743                );
21744            }
21745            other => unreachable!("expected IoError::Sql, got {other:?}"),
21746        }
21747    }
21748
21749    #[cfg(feature = "sql-sqlite")]
21750    #[test]
21751    fn read_sql_dtype_override_missing_column_is_ignored() {
21752        let conn = make_sql_test_conn();
21753        super::SqlConnection::execute_batch(
21754            &conn,
21755            "CREATE TABLE t (x INTEGER); INSERT INTO t VALUES (1);",
21756        )
21757        .unwrap();
21758        let mut dtype_map = BTreeMap::new();
21759        dtype_map.insert("nonexistent".to_owned(), DType::Float64);
21760        let frame = read_sql_with_options(
21761            &conn,
21762            "SELECT x FROM t",
21763            &SqlReadOptions {
21764                params: None,
21765                parse_dates: None,
21766                coerce_float: false,
21767                dtype: Some(dtype_map),
21768                schema: None,
21769                columns: None,
21770                index_col: None,
21771            },
21772        )
21773        .expect("read with dtype-on-missing-col");
21774        let col = frame.column("x").expect("x");
21775        assert_eq!(col.dtype(), DType::Int64);
21776    }
21777
21778    #[cfg(feature = "sql-sqlite")]
21779    #[test]
21780    fn read_sql_dtype_override_preserves_nulls() {
21781        let conn = make_sql_test_conn();
21782        super::SqlConnection::execute_batch(
21783            &conn,
21784            "CREATE TABLE nulls_tbl (v INTEGER); INSERT INTO nulls_tbl VALUES (1), (NULL), (3);",
21785        )
21786        .unwrap();
21787        let mut dtype_map = BTreeMap::new();
21788        dtype_map.insert("v".to_owned(), DType::Float64);
21789        let frame = read_sql_with_options(
21790            &conn,
21791            "SELECT v FROM nulls_tbl ORDER BY rowid",
21792            &SqlReadOptions {
21793                params: None,
21794                parse_dates: None,
21795                coerce_float: false,
21796                dtype: Some(dtype_map),
21797                schema: None,
21798                columns: None,
21799                index_col: None,
21800            },
21801        )
21802        .expect("read with dtype + nulls");
21803        let col = frame.column("v").expect("v");
21804        assert_eq!(col.dtype(), DType::Float64);
21805        assert!(col.values()[1].is_missing());
21806    }
21807
21808    #[cfg(feature = "sql-sqlite")]
21809    #[test]
21810    fn read_sql_dtype_skipped_when_column_in_parse_dates() {
21811        let conn = make_sql_test_conn();
21812        super::SqlConnection::execute_batch(
21813            &conn,
21814            "CREATE TABLE evt (ts TEXT); INSERT INTO evt VALUES ('2024-01-01 00:00:00');",
21815        )
21816        .unwrap();
21817        let mut dtype_map = BTreeMap::new();
21818        dtype_map.insert("ts".to_owned(), DType::Float64);
21819        let frame = read_sql_with_options(
21820            &conn,
21821            "SELECT ts FROM evt",
21822            &SqlReadOptions {
21823                params: None,
21824                parse_dates: Some(vec!["ts".to_owned()]),
21825                coerce_float: false,
21826                dtype: Some(dtype_map),
21827                schema: None,
21828                columns: None,
21829                index_col: None,
21830            },
21831        )
21832        .expect("read with parse_dates priority");
21833        let col = frame.column("ts").expect("ts");
21834        assert_eq!(col.dtype(), DType::Utf8);
21835    }
21836
21837    // ── Schema probes (br-frankenpandas-6dk9 / fd90.13) ─────────────────
21838
21839    #[cfg(feature = "sql-sqlite")]
21840    #[test]
21841    fn rusqlite_does_not_support_schemas_by_default() {
21842        let conn = make_sql_test_conn();
21843        assert!(!super::SqlConnection::supports_schemas(&conn));
21844        assert_eq!(super::SqlConnection::default_schema(&conn), None);
21845    }
21846
21847    #[test]
21848    fn default_schema_probes_are_conservative() {
21849        // A test-double with no schema overrides reports the conservative
21850        // single-namespace defaults (matches SQLite + most embedded
21851        // backends). Production multi-schema backends (PG, MySQL) override.
21852        struct StubSql;
21853        impl super::SqlConnection for StubSql {
21854            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21855                Ok(super::SqlQueryResult {
21856                    columns: vec![],
21857                    rows: vec![],
21858                })
21859            }
21860            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21861                Ok(())
21862            }
21863            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21864                Ok(false)
21865            }
21866            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21867                Ok(())
21868            }
21869            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21870                "TEXT"
21871            }
21872            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21873                "TEXT"
21874            }
21875        }
21876        let stub = StubSql;
21877        assert!(!super::SqlConnection::supports_schemas(&stub));
21878        assert_eq!(super::SqlConnection::default_schema(&stub), None);
21879    }
21880
21881    #[test]
21882    fn schema_probe_overrides_take_effect() {
21883        // A multi-schema-style test backend (e.g. simulating PostgreSQL)
21884        // overrides supports_schemas + default_schema. The overrides MUST
21885        // win over the trait defaults.
21886        struct PgLikeSqlConn;
21887        impl super::SqlConnection for PgLikeSqlConn {
21888            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21889                Ok(super::SqlQueryResult {
21890                    columns: vec![],
21891                    rows: vec![],
21892                })
21893            }
21894            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21895                Ok(())
21896            }
21897            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21898                Ok(false)
21899            }
21900            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21901                Ok(())
21902            }
21903            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21904                "TEXT"
21905            }
21906            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21907                "TEXT"
21908            }
21909            fn supports_schemas(&self) -> bool {
21910                true
21911            }
21912            fn default_schema(&self) -> Option<String> {
21913                Some("public".to_owned())
21914            }
21915        }
21916        let conn = PgLikeSqlConn;
21917        assert!(super::SqlConnection::supports_schemas(&conn));
21918        assert_eq!(
21919            super::SqlConnection::default_schema(&conn).as_deref(),
21920            Some("public")
21921        );
21922    }
21923
21924    // ── SqlReadOptions::schema tests (br-frankenpandas-u6zn / fd90.14) ──
21925
21926    #[test]
21927    fn sql_select_all_query_no_schema_uses_bare_table() {
21928        struct StubSql;
21929        impl super::SqlConnection for StubSql {
21930            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21931                Ok(super::SqlQueryResult {
21932                    columns: vec![],
21933                    rows: vec![],
21934                })
21935            }
21936            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21937                Ok(())
21938            }
21939            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21940                Ok(false)
21941            }
21942            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21943                Ok(())
21944            }
21945            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21946                "TEXT"
21947            }
21948            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21949                "TEXT"
21950            }
21951        }
21952        let conn = StubSql;
21953        let q1 = super::sql_select_all_query_in_schema(&conn, "users", None).expect("q1");
21954        assert_eq!(q1, "SELECT * FROM \"users\"");
21955    }
21956
21957    #[test]
21958    fn sql_select_query_with_schema_rejects_non_schema_backend() {
21959        struct StubSql;
21960        impl super::SqlConnection for StubSql {
21961            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
21962                Ok(super::SqlQueryResult {
21963                    columns: vec![],
21964                    rows: vec![],
21965                })
21966            }
21967            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
21968                Ok(())
21969            }
21970            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
21971                Ok(false)
21972            }
21973            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
21974                Ok(())
21975            }
21976            fn dtype_sql(&self, _dtype: DType) -> &'static str {
21977                "TEXT"
21978            }
21979            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
21980                "TEXT"
21981            }
21982            fn dialect_name(&self) -> &'static str {
21983                "stub"
21984            }
21985        }
21986        let conn = StubSql;
21987        let err = super::sql_select_all_query_in_schema(&conn, "users", Some("analytics"))
21988            .expect_err("schema must reject when backend has no schema support");
21989        assert!(
21990            matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by stub backend"))
21991        );
21992
21993        let err =
21994            super::sql_select_columns_query_in_schema(&conn, "users", Some("analytics"), &["id"])
21995                .expect_err("projected schema select must reject too");
21996        assert!(
21997            matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by stub backend"))
21998        );
21999    }
22000
22001    #[test]
22002    fn sql_select_all_query_with_schema_qualifies_on_multi_schema_backend() {
22003        struct PgLikeSchemaSql;
22004        impl super::SqlConnection for PgLikeSchemaSql {
22005            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22006                Ok(super::SqlQueryResult {
22007                    columns: vec![],
22008                    rows: vec![],
22009                })
22010            }
22011            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22012                Ok(())
22013            }
22014            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22015                Ok(false)
22016            }
22017            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22018                Ok(())
22019            }
22020            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22021                "TEXT"
22022            }
22023            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22024                "TEXT"
22025            }
22026            fn supports_schemas(&self) -> bool {
22027                true
22028            }
22029        }
22030        let conn = PgLikeSchemaSql;
22031        let q =
22032            super::sql_select_all_query_in_schema(&conn, "users", Some("analytics")).expect("q");
22033        assert_eq!(q, "SELECT * FROM \"analytics\".\"users\"");
22034        let bare = super::sql_select_all_query_in_schema(&conn, "users", None).expect("bare");
22035        assert_eq!(bare, "SELECT * FROM \"users\"");
22036    }
22037
22038    #[cfg(feature = "sql-sqlite")]
22039    #[test]
22040    fn read_sql_table_with_options_schema_rejected_on_sqlite() {
22041        let conn = make_sql_test_conn();
22042        super::SqlConnection::execute_batch(
22043            &conn,
22044            "CREATE TABLE bare_tbl (x INTEGER); INSERT INTO bare_tbl VALUES (1), (2);",
22045        )
22046        .unwrap();
22047        let err = read_sql_table_with_options(
22048            &conn,
22049            "bare_tbl",
22050            &SqlReadOptions {
22051                params: None,
22052                parse_dates: None,
22053                coerce_float: false,
22054                dtype: None,
22055                schema: Some("ignored_on_sqlite".to_owned()),
22056                columns: None,
22057                index_col: None,
22058            },
22059        )
22060        .expect_err("read_sql_table schema=Some must reject on SQLite");
22061        assert!(
22062            matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by sqlite backend"))
22063        );
22064    }
22065
22066    #[cfg(feature = "sql-sqlite")]
22067    #[test]
22068    fn read_sql_table_chunks_with_options_schema_rejected_on_sqlite() {
22069        let conn = make_sql_test_conn();
22070        super::SqlConnection::execute_batch(
22071            &conn,
22072            "CREATE TABLE chunk_bare_tbl (x INTEGER); INSERT INTO chunk_bare_tbl VALUES (1), (2);",
22073        )
22074        .unwrap();
22075        let err = read_sql_table_chunks_with_options(
22076            &conn,
22077            "chunk_bare_tbl",
22078            &SqlReadOptions {
22079                schema: Some("ignored_on_sqlite".to_owned()),
22080                ..Default::default()
22081            },
22082            1,
22083        )
22084        .expect_err("chunked read_sql_table schema=Some must reject on SQLite");
22085        assert!(
22086            matches!(err, IoError::Sql(msg) if msg.contains("schema is not supported by sqlite backend"))
22087        );
22088    }
22089
22090    #[test]
22091    fn sql_select_all_query_in_schema_validates_schema_name() {
22092        struct PgLikeValidate;
22093        impl super::SqlConnection for PgLikeValidate {
22094            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22095                Ok(super::SqlQueryResult {
22096                    columns: vec![],
22097                    rows: vec![],
22098                })
22099            }
22100            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22101                Ok(())
22102            }
22103            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22104                Ok(false)
22105            }
22106            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22107                Ok(())
22108            }
22109            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22110                "TEXT"
22111            }
22112            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22113                "TEXT"
22114            }
22115            fn supports_schemas(&self) -> bool {
22116                true
22117            }
22118        }
22119        let conn = PgLikeValidate;
22120        let err = super::sql_select_all_query_in_schema(&conn, "users", Some("evil; DROP"))
22121            .expect_err("malformed schema must reject");
22122        // Per fd90.56: error message now correctly identifies the
22123        // bad identifier as a schema, not a table.
22124        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid schema name")));
22125    }
22126
22127    // ── SqlWriteOptions::schema tests (br-frankenpandas-udn6 / fd90.15) ─
22128
22129    #[test]
22130    fn sql_create_table_query_in_schema_qualifies_on_multi_schema_backend() {
22131        struct PgLikeWrite;
22132        impl super::SqlConnection for PgLikeWrite {
22133            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22134                Ok(super::SqlQueryResult {
22135                    columns: vec![],
22136                    rows: vec![],
22137                })
22138            }
22139            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22140                Ok(())
22141            }
22142            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22143                Ok(false)
22144            }
22145            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22146                Ok(())
22147            }
22148            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22149                "TEXT"
22150            }
22151            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22152                "TEXT"
22153            }
22154            fn supports_schemas(&self) -> bool {
22155                true
22156            }
22157        }
22158        let conn = PgLikeWrite;
22159        let cols = vec!["id INTEGER".to_owned(), "name TEXT".to_owned()];
22160        let q = super::sql_create_table_query_in_schema(&conn, "users", Some("analytics"), &cols)
22161            .expect("create");
22162        assert_eq!(
22163            q,
22164            "CREATE TABLE IF NOT EXISTS \"analytics\".\"users\" (id INTEGER, name TEXT)"
22165        );
22166        let bare =
22167            super::sql_create_table_query_in_schema(&conn, "users", None, &cols).expect("bare");
22168        assert_eq!(
22169            bare,
22170            "CREATE TABLE IF NOT EXISTS \"users\" (id INTEGER, name TEXT)"
22171        );
22172    }
22173
22174    #[test]
22175    fn sql_insert_rows_query_in_schema_qualifies_on_multi_schema_backend() {
22176        struct PgLikeInsert;
22177        impl super::SqlConnection for PgLikeInsert {
22178            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22179                Ok(super::SqlQueryResult {
22180                    columns: vec![],
22181                    rows: vec![],
22182                })
22183            }
22184            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22185                Ok(())
22186            }
22187            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22188                Ok(false)
22189            }
22190            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22191                Ok(())
22192            }
22193            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22194                "TEXT"
22195            }
22196            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22197                "TEXT"
22198            }
22199            fn supports_schemas(&self) -> bool {
22200                true
22201            }
22202        }
22203        let conn = PgLikeInsert;
22204        let cols = vec!["id".to_owned(), "name".to_owned()];
22205        let q = super::sql_insert_rows_query_in_schema(&conn, "users", Some("analytics"), &cols)
22206            .expect("insert");
22207        assert_eq!(
22208            q,
22209            "INSERT INTO \"analytics\".\"users\" (\"id\", \"name\") VALUES (?, ?)"
22210        );
22211    }
22212
22213    #[cfg(feature = "sql-sqlite")]
22214    #[test]
22215    fn write_sql_with_options_schema_silently_ignored_on_sqlite() {
22216        let conn = make_sql_test_conn();
22217        let frame = fp_frame::DataFrame::from_dict(
22218            &["x"],
22219            vec![("x", vec![Scalar::Int64(1), Scalar::Int64(2)])],
22220        )
22221        .unwrap();
22222        // SQLite reports supports_schemas=false; passing schema=Some(s) must
22223        // not break the write — the bare table reference is used.
22224        write_sql_with_options(
22225            &frame,
22226            &conn,
22227            "bare_write_tbl",
22228            &SqlWriteOptions {
22229                if_exists: SqlIfExists::Fail,
22230                index: false,
22231                index_label: None,
22232                schema: Some("ignored_on_sqlite".to_owned()),
22233                dtype: None,
22234                method: SqlInsertMethod::Single,
22235                chunksize: None,
22236            },
22237        )
22238        .expect("write with schema=Some on SQLite");
22239        let back = read_sql_table(&conn, "bare_write_tbl").expect("read");
22240        let col = back.column("x").expect("x");
22241        assert_eq!(col.values()[0], Scalar::Int64(1));
22242        assert_eq!(col.values()[1], Scalar::Int64(2));
22243    }
22244
22245    #[test]
22246    fn sql_create_table_query_in_schema_validates_schema_name() {
22247        struct PgLikeValidate;
22248        impl super::SqlConnection for PgLikeValidate {
22249            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22250                Ok(super::SqlQueryResult {
22251                    columns: vec![],
22252                    rows: vec![],
22253                })
22254            }
22255            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22256                Ok(())
22257            }
22258            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22259                Ok(false)
22260            }
22261            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22262                Ok(())
22263            }
22264            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22265                "TEXT"
22266            }
22267            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22268                "TEXT"
22269            }
22270            fn supports_schemas(&self) -> bool {
22271                true
22272            }
22273        }
22274        let conn = PgLikeValidate;
22275        let cols = vec!["x INTEGER".to_owned()];
22276        let err =
22277            super::sql_create_table_query_in_schema(&conn, "users", Some("evil; DROP"), &cols)
22278                .expect_err("malformed schema must reject");
22279        // Per fd90.56: schema validation error now says invalid
22280        // schema name (not invalid table name).
22281        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid schema name")));
22282    }
22283
22284    // ── DROP TABLE schema-qualification (br-frankenpandas-hxob / fd90.16) ─
22285
22286    #[test]
22287    fn sql_drop_table_query_bare_on_non_multi_schema() {
22288        struct StubSql;
22289        impl super::SqlConnection for StubSql {
22290            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22291                Ok(super::SqlQueryResult {
22292                    columns: vec![],
22293                    rows: vec![],
22294                })
22295            }
22296            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22297                Ok(())
22298            }
22299            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22300                Ok(false)
22301            }
22302            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22303                Ok(())
22304            }
22305            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22306                "TEXT"
22307            }
22308            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22309                "TEXT"
22310            }
22311        }
22312        let conn = StubSql;
22313        let q = super::sql_drop_table_query_in_schema(&conn, "users", None).expect("drop none");
22314        assert_eq!(q, "DROP TABLE IF EXISTS \"users\"");
22315        // schema=Some on non-multi-schema is silently ignored.
22316        let q2 =
22317            super::sql_drop_table_query_in_schema(&conn, "users", Some("ignored")).expect("drop");
22318        assert_eq!(q2, "DROP TABLE IF EXISTS \"users\"");
22319    }
22320
22321    #[test]
22322    fn sql_drop_table_query_qualifies_on_multi_schema_backend() {
22323        struct PgLikeDrop;
22324        impl super::SqlConnection for PgLikeDrop {
22325            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22326                Ok(super::SqlQueryResult {
22327                    columns: vec![],
22328                    rows: vec![],
22329                })
22330            }
22331            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22332                Ok(())
22333            }
22334            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22335                Ok(false)
22336            }
22337            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22338                Ok(())
22339            }
22340            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22341                "TEXT"
22342            }
22343            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22344                "TEXT"
22345            }
22346            fn supports_schemas(&self) -> bool {
22347                true
22348            }
22349        }
22350        let conn = PgLikeDrop;
22351        let q = super::sql_drop_table_query_in_schema(&conn, "users", Some("analytics"))
22352            .expect("drop qualified");
22353        assert_eq!(q, "DROP TABLE IF EXISTS \"analytics\".\"users\"");
22354        let bare = super::sql_drop_table_query_in_schema(&conn, "users", None).expect("drop bare");
22355        assert_eq!(bare, "DROP TABLE IF EXISTS \"users\"");
22356    }
22357
22358    #[cfg(feature = "sql-sqlite")]
22359    #[test]
22360    fn write_sql_replace_with_schema_silently_ignored_on_sqlite() {
22361        // Replace path drops + recreates. SQLite reports supports_schemas
22362        // == false; the schema is silently ignored on the DROP and on the
22363        // CREATE/INSERT — the round trip lands data in the bare table.
22364        let conn = make_sql_test_conn();
22365        super::SqlConnection::execute_batch(
22366            &conn,
22367            "CREATE TABLE replace_tbl (x INTEGER); INSERT INTO replace_tbl VALUES (99);",
22368        )
22369        .unwrap();
22370        let frame = fp_frame::DataFrame::from_dict(
22371            &["x"],
22372            vec![("x", vec![Scalar::Int64(1), Scalar::Int64(2)])],
22373        )
22374        .unwrap();
22375        write_sql_with_options(
22376            &frame,
22377            &conn,
22378            "replace_tbl",
22379            &SqlWriteOptions {
22380                if_exists: SqlIfExists::Replace,
22381                index: false,
22382                index_label: None,
22383                schema: Some("ignored_on_sqlite".to_owned()),
22384                dtype: None,
22385                method: SqlInsertMethod::Single,
22386                chunksize: None,
22387            },
22388        )
22389        .expect("replace + schema=Some on SQLite");
22390        let back = read_sql_table(&conn, "replace_tbl").expect("read");
22391        let col = back.column("x").expect("x");
22392        // Pre-existing 99 was dropped; new rows present.
22393        assert_eq!(col.values().len(), 2);
22394        assert_eq!(col.values()[0], Scalar::Int64(1));
22395        assert_eq!(col.values()[1], Scalar::Int64(2));
22396    }
22397
22398    // ── table_exists_in_schema (br-frankenpandas-70d1 / fd90.17) ────────
22399
22400    #[test]
22401    fn default_table_exists_in_schema_delegates_to_table_exists() {
22402        // A stub that returns table_exists=true for "users" must report the
22403        // same value via the schema-aware default impl regardless of schema.
22404        struct StubExistsTrue;
22405        impl super::SqlConnection for StubExistsTrue {
22406            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22407                Ok(super::SqlQueryResult {
22408                    columns: vec![],
22409                    rows: vec![],
22410                })
22411            }
22412            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22413                Ok(())
22414            }
22415            fn table_exists(&self, name: &str) -> Result<bool, IoError> {
22416                Ok(name == "users")
22417            }
22418            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22419                Ok(())
22420            }
22421            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22422                "TEXT"
22423            }
22424            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22425                "TEXT"
22426            }
22427        }
22428        let conn = StubExistsTrue;
22429        // Schema is ignored by the default impl.
22430        assert!(super::SqlConnection::table_exists_in_schema(&conn, "users", None).unwrap());
22431        assert!(
22432            super::SqlConnection::table_exists_in_schema(&conn, "users", Some("ignored")).unwrap()
22433        );
22434        assert!(!super::SqlConnection::table_exists_in_schema(&conn, "missing", None).unwrap());
22435    }
22436
22437    #[test]
22438    fn multi_schema_override_scopes_table_exists() {
22439        // PgLikeSchemaCheck overrides table_exists_in_schema to scope by
22440        // schema: only ('analytics', 'users') exists.
22441        struct PgLikeSchemaCheck;
22442        impl super::SqlConnection for PgLikeSchemaCheck {
22443            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
22444                Ok(super::SqlQueryResult {
22445                    columns: vec![],
22446                    rows: vec![],
22447                })
22448            }
22449            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22450                Ok(())
22451            }
22452            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22453                // Bare table_exists isn't queried by the override path.
22454                Ok(false)
22455            }
22456            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22457                Ok(())
22458            }
22459            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22460                "TEXT"
22461            }
22462            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22463                "TEXT"
22464            }
22465            fn supports_schemas(&self) -> bool {
22466                true
22467            }
22468            fn table_exists_in_schema(
22469                &self,
22470                table: &str,
22471                schema: Option<&str>,
22472            ) -> Result<bool, IoError> {
22473                Ok(table == "users" && schema == Some("analytics"))
22474            }
22475        }
22476        let conn = PgLikeSchemaCheck;
22477        assert!(
22478            super::SqlConnection::table_exists_in_schema(&conn, "users", Some("analytics"))
22479                .unwrap()
22480        );
22481        // Different schema → false.
22482        assert!(
22483            !super::SqlConnection::table_exists_in_schema(&conn, "users", Some("audit")).unwrap()
22484        );
22485        // No schema → false (override scopes by Some).
22486        assert!(!super::SqlConnection::table_exists_in_schema(&conn, "users", None).unwrap());
22487    }
22488
22489    #[cfg(feature = "sql-sqlite")]
22490    #[test]
22491    fn write_sql_fail_with_schema_some_still_rejects_existing_on_sqlite() {
22492        // SQLite ignores schema everywhere; the Fail branch still reports
22493        // 'table already exists' when the bare table is present.
22494        let conn = make_sql_test_conn();
22495        super::SqlConnection::execute_batch(&conn, "CREATE TABLE preexists_tbl (x INTEGER);")
22496            .unwrap();
22497        let frame =
22498            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22499        let err = write_sql_with_options(
22500            &frame,
22501            &conn,
22502            "preexists_tbl",
22503            &SqlWriteOptions {
22504                if_exists: SqlIfExists::Fail,
22505                index: false,
22506                index_label: None,
22507                schema: Some("ignored_on_sqlite".to_owned()),
22508                dtype: None,
22509                method: SqlInsertMethod::Single,
22510                chunksize: None,
22511            },
22512        )
22513        .expect_err("Fail branch must still reject pre-existing");
22514        assert!(matches!(err, IoError::Sql(msg) if msg.contains("already exists")));
22515    }
22516
22517    // ── SqlWriteOptions::dtype overrides (br-frankenpandas-ev2s / fd90.18) ─
22518
22519    #[cfg(feature = "sql-sqlite")]
22520    #[test]
22521    fn write_sql_dtype_override_emits_custom_sql_type() {
22522        // SQLite is permissive on declared types — the column-type string
22523        // ends up in sqlite_master.sql verbatim, which we can grep to verify
22524        // the override took effect during CREATE TABLE.
22525        let conn = make_sql_test_conn();
22526        let frame = fp_frame::DataFrame::from_dict(
22527            &["amount"],
22528            vec![("amount", vec![Scalar::Int64(100), Scalar::Int64(250)])],
22529        )
22530        .unwrap();
22531        let mut overrides = BTreeMap::new();
22532        overrides.insert("amount".to_owned(), "NUMERIC(10,2)".to_owned());
22533        write_sql_with_options(
22534            &frame,
22535            &conn,
22536            "money_tbl",
22537            &SqlWriteOptions {
22538                if_exists: SqlIfExists::Fail,
22539                index: false,
22540                index_label: None,
22541                schema: None,
22542                dtype: Some(overrides),
22543                method: SqlInsertMethod::Single,
22544                chunksize: None,
22545            },
22546        )
22547        .expect("write with dtype override");
22548        let sm = super::SqlConnection::query(
22549            &conn,
22550            "SELECT sql FROM sqlite_master WHERE name = 'money_tbl'",
22551            &[],
22552        )
22553        .unwrap();
22554        let create_sql = match &sm.rows[0][0] {
22555            Scalar::Utf8(s) => s.clone(),
22556            other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22557        };
22558        assert!(
22559            create_sql.contains("NUMERIC(10,2)"),
22560            "expected override to land in CREATE TABLE; got: {create_sql}"
22561        );
22562    }
22563
22564    #[cfg(feature = "sql-sqlite")]
22565    #[test]
22566    fn write_sql_dtype_override_multiple_columns() {
22567        let conn = make_sql_test_conn();
22568        let frame = fp_frame::DataFrame::from_dict(
22569            &["a", "b"],
22570            vec![
22571                ("a", vec![Scalar::Int64(1)]),
22572                ("b", vec![Scalar::Float64(1.5)]),
22573            ],
22574        )
22575        .unwrap();
22576        let mut overrides = BTreeMap::new();
22577        overrides.insert("a".to_owned(), "BIGINT".to_owned());
22578        overrides.insert("b".to_owned(), "DECIMAL(8,4)".to_owned());
22579        write_sql_with_options(
22580            &frame,
22581            &conn,
22582            "multi_tbl",
22583            &SqlWriteOptions {
22584                if_exists: SqlIfExists::Fail,
22585                index: false,
22586                index_label: None,
22587                schema: None,
22588                dtype: Some(overrides),
22589                method: SqlInsertMethod::Single,
22590                chunksize: None,
22591            },
22592        )
22593        .expect("write with multi-column overrides");
22594        let sm = super::SqlConnection::query(
22595            &conn,
22596            "SELECT sql FROM sqlite_master WHERE name = 'multi_tbl'",
22597            &[],
22598        )
22599        .unwrap();
22600        let create_sql = match &sm.rows[0][0] {
22601            Scalar::Utf8(s) => s.clone(),
22602            other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22603        };
22604        assert!(create_sql.contains("BIGINT"));
22605        assert!(create_sql.contains("DECIMAL(8,4)"));
22606    }
22607
22608    #[cfg(feature = "sql-sqlite")]
22609    #[test]
22610    fn write_sql_dtype_override_for_missing_column_silently_ignored() {
22611        let conn = make_sql_test_conn();
22612        let frame =
22613            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22614        let mut overrides = BTreeMap::new();
22615        overrides.insert("nonexistent".to_owned(), "BIGINT".to_owned());
22616        // No error — pandas silently ignores dtype entries for columns not in the frame.
22617        write_sql_with_options(
22618            &frame,
22619            &conn,
22620            "missing_col_tbl",
22621            &SqlWriteOptions {
22622                if_exists: SqlIfExists::Fail,
22623                index: false,
22624                index_label: None,
22625                schema: None,
22626                dtype: Some(overrides),
22627                method: SqlInsertMethod::Single,
22628                chunksize: None,
22629            },
22630        )
22631        .expect("write with override on missing col");
22632        // The actual 'x' column kept its inferred type.
22633        let sm = super::SqlConnection::query(
22634            &conn,
22635            "SELECT sql FROM sqlite_master WHERE name = 'missing_col_tbl'",
22636            &[],
22637        )
22638        .unwrap();
22639        let create_sql = match &sm.rows[0][0] {
22640            Scalar::Utf8(s) => s.clone(),
22641            other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22642        };
22643        assert!(create_sql.contains("INTEGER"));
22644        assert!(!create_sql.contains("BIGINT"));
22645    }
22646
22647    #[cfg(feature = "sql-sqlite")]
22648    #[test]
22649    fn write_sql_dtype_none_falls_back_to_inferred_type() {
22650        let conn = make_sql_test_conn();
22651        let frame =
22652            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
22653        write_sql_with_options(
22654            &frame,
22655            &conn,
22656            "no_override_tbl",
22657            &SqlWriteOptions {
22658                if_exists: SqlIfExists::Fail,
22659                index: false,
22660                index_label: None,
22661                schema: None,
22662                dtype: None,
22663                method: SqlInsertMethod::Single,
22664                chunksize: None,
22665            },
22666        )
22667        .expect("write without override");
22668        let sm = super::SqlConnection::query(
22669            &conn,
22670            "SELECT sql FROM sqlite_master WHERE name = 'no_override_tbl'",
22671            &[],
22672        )
22673        .unwrap();
22674        let create_sql = match &sm.rows[0][0] {
22675            Scalar::Utf8(s) => s.clone(),
22676            other => unreachable!("unexpected sqlite_master payload: {other:?}"),
22677        };
22678        // INTEGER is conn.dtype_sql(DType::Int64) for rusqlite.
22679        assert!(create_sql.contains("INTEGER"));
22680    }
22681
22682    // ── SqlInsertMethod::Multi (br-frankenpandas-i0ml / fd90.19) ─────────
22683
22684    #[cfg(feature = "sql-sqlite")]
22685    #[test]
22686    fn write_sql_multi_round_trip_matches_single() {
22687        // Same frame written via Single vs Multi must produce identical
22688        // SELECT * results — the only observable difference should be
22689        // the wire-format efficiency, never the stored values.
22690        let frame = fp_frame::DataFrame::from_dict(
22691            &["id", "name", "amount"],
22692            vec![
22693                (
22694                    "id",
22695                    vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)],
22696                ),
22697                (
22698                    "name",
22699                    vec![
22700                        Scalar::Utf8("alice".into()),
22701                        Scalar::Utf8("bob".into()),
22702                        Scalar::Utf8("carol".into()),
22703                    ],
22704                ),
22705                (
22706                    "amount",
22707                    vec![
22708                        Scalar::Float64(1.5),
22709                        Scalar::Float64(2.5),
22710                        Scalar::Float64(3.5),
22711                    ],
22712                ),
22713            ],
22714        )
22715        .unwrap();
22716
22717        let conn_single = make_sql_test_conn();
22718        write_sql_with_options(
22719            &frame,
22720            &conn_single,
22721            "single_tbl",
22722            &SqlWriteOptions {
22723                if_exists: SqlIfExists::Fail,
22724                index: false,
22725                index_label: None,
22726                schema: None,
22727                dtype: None,
22728                method: SqlInsertMethod::Single,
22729                chunksize: None,
22730            },
22731        )
22732        .unwrap();
22733        let single = read_sql(&conn_single, "SELECT * FROM single_tbl ORDER BY id").unwrap();
22734
22735        let conn_multi = make_sql_test_conn();
22736        write_sql_with_options(
22737            &frame,
22738            &conn_multi,
22739            "multi_tbl",
22740            &SqlWriteOptions {
22741                if_exists: SqlIfExists::Fail,
22742                index: false,
22743                index_label: None,
22744                schema: None,
22745                dtype: None,
22746                method: SqlInsertMethod::Multi,
22747                chunksize: None,
22748            },
22749        )
22750        .unwrap();
22751        let multi = read_sql(&conn_multi, "SELECT * FROM multi_tbl ORDER BY id").unwrap();
22752
22753        assert_eq!(single.column_names(), multi.column_names());
22754        for name in single.column_names() {
22755            let s = single.column(name).unwrap().values().to_vec();
22756            let m = multi.column(name).unwrap().values().to_vec();
22757            assert_eq!(s, m, "column {name} diverged between Single and Multi");
22758        }
22759    }
22760
22761    #[test]
22762    fn sql_multi_row_insert_query_emits_correct_placeholder_count() {
22763        // 3 rows × 2 cols = 6 placeholders, ordinals 1..=6.
22764        struct PgLikeStub;
22765        impl super::SqlConnection for PgLikeStub {
22766            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22767                Ok(SqlQueryResult {
22768                    columns: vec![],
22769                    rows: vec![],
22770                })
22771            }
22772            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22773                Ok(())
22774            }
22775            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22776                Ok(false)
22777            }
22778            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22779                Ok(())
22780            }
22781            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22782                "TEXT"
22783            }
22784            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22785                "TEXT"
22786            }
22787            fn parameter_marker(&self, ordinal: usize) -> String {
22788                format!("${ordinal}")
22789            }
22790        }
22791        let conn = PgLikeStub;
22792        let cols = vec!["a".to_owned(), "b".to_owned()];
22793        let sql =
22794            super::sql_multi_row_insert_query_in_schema(&conn, "tbl", None, &cols, 3).unwrap();
22795        // Expect: INSERT INTO "tbl" ("a", "b") VALUES ($1, $2), ($3, $4), ($5, $6)
22796        assert!(
22797            sql.contains("VALUES ($1, $2), ($3, $4), ($5, $6)"),
22798            "got: {sql}"
22799        );
22800    }
22801
22802    #[test]
22803    fn sql_multi_row_insert_query_rejects_zero_rows() {
22804        struct StubConn;
22805        impl super::SqlConnection for StubConn {
22806            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22807                Ok(SqlQueryResult {
22808                    columns: vec![],
22809                    rows: vec![],
22810                })
22811            }
22812            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22813                Ok(())
22814            }
22815            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22816                Ok(false)
22817            }
22818            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22819                Ok(())
22820            }
22821            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22822                "TEXT"
22823            }
22824            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22825                "TEXT"
22826            }
22827        }
22828        let conn = StubConn;
22829        let cols = vec!["a".to_owned()];
22830        let err = super::sql_multi_row_insert_query_in_schema(&conn, "tbl", None, &cols, 0)
22831            .expect_err("zero rows must be rejected");
22832        assert!(matches!(err, IoError::Sql(msg) if msg.contains("at least one row")));
22833    }
22834
22835    #[cfg(feature = "sql-sqlite")]
22836    #[test]
22837    fn write_sql_multi_chunks_at_max_param_boundary() {
22838        // Verify the chunking logic dispatches multiple INSERT statements
22839        // when num_rows * num_cols exceeds max_param_count. We override
22840        // max_param_count on a recording stub to force a tiny budget.
22841        use std::cell::RefCell;
22842        struct ChunkRecorder {
22843            statements: RefCell<Vec<String>>,
22844            row_counts: RefCell<Vec<usize>>,
22845        }
22846        impl super::SqlConnection for ChunkRecorder {
22847            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22848                Ok(SqlQueryResult {
22849                    columns: vec![],
22850                    rows: vec![],
22851                })
22852            }
22853            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22854                Ok(())
22855            }
22856            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22857                Ok(false)
22858            }
22859            fn insert_rows(&self, sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22860                self.statements.borrow_mut().push(sql.to_owned());
22861                self.row_counts
22862                    .borrow_mut()
22863                    .push(rows.first().map_or(0, std::vec::Vec::len));
22864                Ok(())
22865            }
22866            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22867                "TEXT"
22868            }
22869            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22870                "TEXT"
22871            }
22872            fn max_param_count(&self) -> Option<usize> {
22873                // 4 params total, ncols=2 → 2 rows per chunk.
22874                Some(4)
22875            }
22876        }
22877        let conn = ChunkRecorder {
22878            statements: RefCell::new(vec![]),
22879            row_counts: RefCell::new(vec![]),
22880        };
22881        let frame = fp_frame::DataFrame::from_dict(
22882            &["a", "b"],
22883            vec![
22884                (
22885                    "a",
22886                    vec![
22887                        Scalar::Int64(1),
22888                        Scalar::Int64(2),
22889                        Scalar::Int64(3),
22890                        Scalar::Int64(4),
22891                        Scalar::Int64(5),
22892                    ],
22893                ),
22894                (
22895                    "b",
22896                    vec![
22897                        Scalar::Int64(10),
22898                        Scalar::Int64(20),
22899                        Scalar::Int64(30),
22900                        Scalar::Int64(40),
22901                        Scalar::Int64(50),
22902                    ],
22903                ),
22904            ],
22905        )
22906        .unwrap();
22907        write_sql_with_options(
22908            &frame,
22909            &conn,
22910            "chunked",
22911            &SqlWriteOptions {
22912                if_exists: SqlIfExists::Fail,
22913                index: false,
22914                index_label: None,
22915                schema: None,
22916                dtype: None,
22917                method: SqlInsertMethod::Multi,
22918                chunksize: None,
22919            },
22920        )
22921        .unwrap();
22922        // 5 rows / 2 per chunk = 3 chunks (2, 2, 1).
22923        let stmts = conn.statements.borrow();
22924        let counts = conn.row_counts.borrow();
22925        assert_eq!(stmts.len(), 3, "expected 3 chunked INSERTs");
22926        // Flat row payloads: 2 rows * 2 cols = 4 scalars, 4, then 1 row * 2 = 2.
22927        assert_eq!(counts.as_slice(), &[4, 4, 2]);
22928    }
22929
22930    #[cfg(feature = "sql-sqlite")]
22931    #[test]
22932    fn write_sql_multi_no_max_param_sends_single_statement() {
22933        // When the backend reports max_param_count() == None, the whole
22934        // frame should ship in a single multi-row INSERT.
22935        use std::cell::RefCell;
22936        struct UnboundedStub {
22937            statements: RefCell<Vec<String>>,
22938        }
22939        impl super::SqlConnection for UnboundedStub {
22940            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
22941                Ok(SqlQueryResult {
22942                    columns: vec![],
22943                    rows: vec![],
22944                })
22945            }
22946            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
22947                Ok(())
22948            }
22949            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
22950                Ok(false)
22951            }
22952            fn insert_rows(&self, sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
22953                self.statements.borrow_mut().push(sql.to_owned());
22954                Ok(())
22955            }
22956            fn dtype_sql(&self, _dtype: DType) -> &'static str {
22957                "TEXT"
22958            }
22959            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
22960                "TEXT"
22961            }
22962            fn max_param_count(&self) -> Option<usize> {
22963                None
22964            }
22965        }
22966        let conn = UnboundedStub {
22967            statements: RefCell::new(vec![]),
22968        };
22969        let frame = fp_frame::DataFrame::from_dict(
22970            &["x"],
22971            vec![(
22972                "x",
22973                vec![Scalar::Int64(1), Scalar::Int64(2), Scalar::Int64(3)],
22974            )],
22975        )
22976        .unwrap();
22977        write_sql_with_options(
22978            &frame,
22979            &conn,
22980            "uncapped",
22981            &SqlWriteOptions {
22982                if_exists: SqlIfExists::Fail,
22983                index: false,
22984                index_label: None,
22985                schema: None,
22986                dtype: None,
22987                method: SqlInsertMethod::Multi,
22988                chunksize: None,
22989            },
22990        )
22991        .unwrap();
22992        let stmts = conn.statements.borrow();
22993        assert_eq!(stmts.len(), 1, "expected exactly one multi-row INSERT");
22994        // 3 tuples → 2 commas separating tuples in VALUES (...), (...), (...).
22995        let stmt = &stmts[0];
22996        assert_eq!(
22997            stmt.matches("(?)").count(),
22998            3,
22999            "expected 3 row tuples in: {stmt}"
23000        );
23001    }
23002
23003    #[cfg(feature = "sql-sqlite")]
23004    #[test]
23005    fn write_sql_multi_preserves_nulls() {
23006        // NaT/Null values must round-trip through the multi-row path.
23007        let frame = fp_frame::DataFrame::from_dict(
23008            &["a", "b"],
23009            vec![
23010                (
23011                    "a",
23012                    vec![
23013                        Scalar::Int64(1),
23014                        Scalar::Null(NullKind::Null),
23015                        Scalar::Int64(3),
23016                    ],
23017                ),
23018                (
23019                    "b",
23020                    vec![
23021                        Scalar::Utf8("x".into()),
23022                        Scalar::Utf8("y".into()),
23023                        Scalar::Null(NullKind::Null),
23024                    ],
23025                ),
23026            ],
23027        )
23028        .unwrap();
23029        let conn = make_sql_test_conn();
23030        write_sql_with_options(
23031            &frame,
23032            &conn,
23033            "nulls_tbl",
23034            &SqlWriteOptions {
23035                if_exists: SqlIfExists::Fail,
23036                index: false,
23037                index_label: None,
23038                schema: None,
23039                dtype: None,
23040                method: SqlInsertMethod::Multi,
23041                chunksize: None,
23042            },
23043        )
23044        .unwrap();
23045        let back = read_sql(&conn, "SELECT a, b FROM nulls_tbl ORDER BY rowid").unwrap();
23046        let a = back.column("a").unwrap().values();
23047        let b = back.column("b").unwrap().values();
23048        assert_eq!(a[0], Scalar::Int64(1));
23049        assert!(matches!(a[1], Scalar::Null(_)));
23050        assert_eq!(a[2], Scalar::Int64(3));
23051        assert_eq!(b[0], Scalar::Utf8("x".into()));
23052        assert_eq!(b[1], Scalar::Utf8("y".into()));
23053        assert!(matches!(b[2], Scalar::Null(_)));
23054    }
23055
23056    #[test]
23057    fn sql_insert_method_default_is_single() {
23058        assert_eq!(SqlInsertMethod::default(), SqlInsertMethod::Single);
23059    }
23060
23061    // ── list_sql_tables / SqlConnection::list_tables (br-vhq2 / fd90.20) ─
23062
23063    #[cfg(feature = "sql-sqlite")]
23064    #[test]
23065    fn list_sql_tables_empty_db_returns_empty_vec() {
23066        let conn = make_sql_test_conn();
23067        let tables = list_sql_tables(&conn, None).unwrap();
23068        assert!(tables.is_empty(), "expected no tables; got {tables:?}");
23069    }
23070
23071    #[cfg(feature = "sql-sqlite")]
23072    #[test]
23073    fn list_sql_tables_returns_user_tables_sorted() {
23074        let conn = make_sql_test_conn();
23075        super::SqlConnection::execute_batch(&conn, "CREATE TABLE zebra (x INTEGER);").unwrap();
23076        super::SqlConnection::execute_batch(&conn, "CREATE TABLE alpha (y TEXT);").unwrap();
23077        super::SqlConnection::execute_batch(&conn, "CREATE TABLE mango (z REAL);").unwrap();
23078        let tables = list_sql_tables(&conn, None).unwrap();
23079        assert_eq!(tables, vec!["alpha", "mango", "zebra"]);
23080    }
23081
23082    #[cfg(feature = "sql-sqlite")]
23083    #[test]
23084    fn list_sql_tables_excludes_sqlite_internal_tables() {
23085        let conn = make_sql_test_conn();
23086        // Forcing creation of an internal sqlite_sequence table by using
23087        // AUTOINCREMENT — that table must NOT appear in the result.
23088        super::SqlConnection::execute_batch(
23089            &conn,
23090            "CREATE TABLE seq_demo (id INTEGER PRIMARY KEY AUTOINCREMENT, v TEXT);",
23091        )
23092        .unwrap();
23093        super::SqlConnection::execute_batch(&conn, "INSERT INTO seq_demo (v) VALUES ('one');")
23094            .unwrap();
23095        let tables = list_sql_tables(&conn, None).unwrap();
23096        assert_eq!(tables, vec!["seq_demo"]);
23097        assert!(!tables.iter().any(|name| name.starts_with("sqlite_")));
23098    }
23099
23100    #[cfg(feature = "sql-sqlite")]
23101    #[test]
23102    fn list_sql_tables_keeps_user_tables_with_sqlite_prefix_no_underscore() {
23103        // Per fd90.50: a user table named like 'sqliteX' (no underscore
23104        // between 'sqlite' and the rest) was being incorrectly excluded
23105        // by the buggy `NOT LIKE 'sqlite_%'` filter (where `_` is a
23106        // single-char wildcard). After the ESCAPE '\' fix the
23107        // underscore is treated literally, so only names starting with
23108        // the literal substring 'sqlite_' are excluded.
23109        let conn = make_sql_test_conn();
23110        super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqliteX (x INTEGER);").unwrap();
23111        super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqliteY (y TEXT);").unwrap();
23112        super::SqlConnection::execute_batch(&conn, "CREATE TABLE sqlite1234 (z REAL);").unwrap();
23113        let tables = list_sql_tables(&conn, None).unwrap();
23114        assert_eq!(tables, vec!["sqlite1234", "sqliteX", "sqliteY"]);
23115    }
23116
23117    #[cfg(feature = "sql-sqlite")]
23118    #[test]
23119    fn list_sql_views_keeps_user_views_with_sqlite_prefix_no_underscore() {
23120        // Companion to the list_tables case (fd90.50): same escape fix
23121        // applies to list_views.
23122        let conn = make_sql_test_conn();
23123        super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (x INTEGER);").unwrap();
23124        super::SqlConnection::execute_batch(
23125            &conn,
23126            "CREATE VIEW sqliteX_view AS SELECT x FROM base;",
23127        )
23128        .unwrap();
23129        let views = list_sql_views(&conn, None).unwrap();
23130        // sqliteX_view: 'sqliteX' (no underscore after 'sqlite') so the
23131        // literal-underscore filter accepts it.
23132        assert_eq!(views, vec!["sqliteX_view"]);
23133    }
23134
23135    #[cfg(feature = "sql-sqlite")]
23136    #[test]
23137    fn list_sql_tables_schema_silently_ignored_on_sqlite() {
23138        // SQLite reports supports_schemas() == false. Passing a schema
23139        // arg must NOT error; it is silently dropped and all tables are
23140        // returned (matches the documented option-struct ignore policy).
23141        let conn = make_sql_test_conn();
23142        super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_one (x INTEGER);").unwrap();
23143        let with_schema =
23144            list_sql_tables(&conn, Some("ignored_on_sqlite")).expect("schema arg must not error");
23145        let without_schema = list_sql_tables(&conn, None).unwrap();
23146        assert_eq!(with_schema, without_schema);
23147    }
23148
23149    #[test]
23150    fn list_sql_tables_default_impl_returns_empty() {
23151        // A backend that doesn't override list_tables falls through to
23152        // the trait default — returns empty rather than erroring.
23153        struct NoIntrospection;
23154        impl super::SqlConnection for NoIntrospection {
23155            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23156                Ok(SqlQueryResult {
23157                    columns: vec![],
23158                    rows: vec![],
23159                })
23160            }
23161            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23162                Ok(())
23163            }
23164            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23165                Ok(false)
23166            }
23167            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23168                Ok(())
23169            }
23170            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23171                "TEXT"
23172            }
23173            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23174                "TEXT"
23175            }
23176        }
23177        let conn = NoIntrospection;
23178        let tables = list_sql_tables(&conn, None).unwrap();
23179        assert!(tables.is_empty());
23180        let with_schema = list_sql_tables(&conn, Some("any")).unwrap();
23181        assert!(with_schema.is_empty());
23182    }
23183
23184    #[test]
23185    fn list_sql_tables_routes_schema_to_backend_override() {
23186        // Multi-schema backend stub: returns different tables per schema.
23187        struct MultiSchema;
23188        impl super::SqlConnection for MultiSchema {
23189            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23190                Ok(SqlQueryResult {
23191                    columns: vec![],
23192                    rows: vec![],
23193                })
23194            }
23195            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23196                Ok(())
23197            }
23198            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23199                Ok(false)
23200            }
23201            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23202                Ok(())
23203            }
23204            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23205                "TEXT"
23206            }
23207            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23208                "TEXT"
23209            }
23210            fn supports_schemas(&self) -> bool {
23211                true
23212            }
23213            fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
23214                Ok(match schema {
23215                    Some("analytics") => {
23216                        vec!["users".to_owned(), "events".to_owned()]
23217                    }
23218                    Some("audit") => vec!["logs".to_owned()],
23219                    Some(_) => vec![],
23220                    None => vec!["public_table".to_owned()],
23221                })
23222            }
23223        }
23224        let conn = MultiSchema;
23225        assert_eq!(
23226            list_sql_tables(&conn, Some("analytics")).unwrap(),
23227            vec!["users", "events"]
23228        );
23229        assert_eq!(list_sql_tables(&conn, Some("audit")).unwrap(), vec!["logs"]);
23230        assert_eq!(
23231            list_sql_tables(&conn, Some("missing")).unwrap(),
23232            Vec::<String>::new()
23233        );
23234        assert_eq!(list_sql_tables(&conn, None).unwrap(), vec!["public_table"]);
23235    }
23236
23237    // ── sql_table_schema / SqlConnection::table_schema (br-w43q / fd90.21) ─
23238
23239    #[cfg(feature = "sql-sqlite")]
23240    #[test]
23241    fn sql_table_schema_unknown_table_returns_none() {
23242        let conn = make_sql_test_conn();
23243        let result = sql_table_schema(&conn, "no_such_table", None).unwrap();
23244        assert!(result.is_none());
23245    }
23246
23247    #[cfg(feature = "sql-sqlite")]
23248    #[test]
23249    fn sql_table_schema_simple_table() {
23250        let conn = make_sql_test_conn();
23251        super::SqlConnection::execute_batch(&conn, "CREATE TABLE simple (id INTEGER, name TEXT);")
23252            .unwrap();
23253        let schema = sql_table_schema(&conn, "simple", None).unwrap().unwrap();
23254        assert_eq!(schema.table_name, "simple");
23255        assert_eq!(schema.columns.len(), 2);
23256        assert_eq!(schema.columns[0].name, "id");
23257        assert_eq!(schema.columns[0].declared_type.as_deref(), Some("INTEGER"));
23258        assert!(schema.columns[0].nullable);
23259        assert!(schema.columns[0].primary_key_ordinal.is_none());
23260        assert_eq!(schema.columns[1].name, "name");
23261        assert_eq!(schema.columns[1].declared_type.as_deref(), Some("TEXT"));
23262        assert!(schema.columns[1].nullable);
23263    }
23264
23265    #[cfg(feature = "sql-sqlite")]
23266    #[test]
23267    fn sql_table_schema_pk_notnull_default() {
23268        let conn = make_sql_test_conn();
23269        super::SqlConnection::execute_batch(
23270            &conn,
23271            "CREATE TABLE meta ( \
23272                id INTEGER PRIMARY KEY, \
23273                name TEXT NOT NULL, \
23274                status TEXT DEFAULT 'active' \
23275             );",
23276        )
23277        .unwrap();
23278        let schema = sql_table_schema(&conn, "meta", None).unwrap().unwrap();
23279        assert_eq!(schema.columns.len(), 3);
23280
23281        let id = schema.column("id").expect("id col");
23282        assert_eq!(id.primary_key_ordinal, Some(0));
23283        // PRIMARY KEY columns in SQLite (without explicit NOT NULL on
23284        // INTEGER PRIMARY KEY) are reported as nullable=true by
23285        // table_info — we surface that as-is rather than fabricating.
23286        // The point is just that primary_key_ordinal is populated.
23287
23288        let name = schema.column("name").expect("name col");
23289        assert!(!name.nullable);
23290        assert!(name.default_value.is_none());
23291        assert!(name.primary_key_ordinal.is_none());
23292
23293        let status = schema.column("status").expect("status col");
23294        assert!(status.nullable);
23295        assert_eq!(
23296            status.default_value.as_deref(),
23297            Some("'active'"),
23298            "expected SQL literal default text"
23299        );
23300    }
23301
23302    #[cfg(feature = "sql-sqlite")]
23303    #[test]
23304    fn sql_table_schema_schema_silently_ignored_on_sqlite() {
23305        let conn = make_sql_test_conn();
23306        super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_one (x INTEGER);").unwrap();
23307        let with_schema = sql_table_schema(&conn, "only_one", Some("ignored_on_sqlite"))
23308            .expect("schema arg must not error")
23309            .expect("table exists");
23310        let without_schema = sql_table_schema(&conn, "only_one", None).unwrap().unwrap();
23311        assert_eq!(with_schema, without_schema);
23312    }
23313
23314    #[cfg(feature = "sql-sqlite")]
23315    #[test]
23316    fn sql_table_schema_rejects_invalid_table_name() {
23317        // The PRAGMA path can't bind parameters, so we validate the
23318        // identifier first. Reject anything with non-alphanumeric chars.
23319        let conn = make_sql_test_conn();
23320        let err = sql_table_schema(&conn, "x; DROP TABLE users", None).expect_err("must reject");
23321        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
23322    }
23323
23324    #[test]
23325    fn sql_table_schema_default_impl_returns_none() {
23326        struct NoIntrospection;
23327        impl super::SqlConnection for NoIntrospection {
23328            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23329                Ok(SqlQueryResult {
23330                    columns: vec![],
23331                    rows: vec![],
23332                })
23333            }
23334            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23335                Ok(())
23336            }
23337            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23338                Ok(false)
23339            }
23340            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23341                Ok(())
23342            }
23343            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23344                "TEXT"
23345            }
23346            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23347                "TEXT"
23348            }
23349        }
23350        let conn = NoIntrospection;
23351        assert!(sql_table_schema(&conn, "anything", None).unwrap().is_none());
23352    }
23353
23354    #[test]
23355    fn sql_table_schema_routes_schema_to_backend_override() {
23356        struct MultiSchema;
23357        impl super::SqlConnection for MultiSchema {
23358            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23359                Ok(SqlQueryResult {
23360                    columns: vec![],
23361                    rows: vec![],
23362                })
23363            }
23364            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23365                Ok(())
23366            }
23367            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23368                Ok(false)
23369            }
23370            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23371                Ok(())
23372            }
23373            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23374                "TEXT"
23375            }
23376            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23377                "TEXT"
23378            }
23379            fn supports_schemas(&self) -> bool {
23380                true
23381            }
23382            fn table_schema(
23383                &self,
23384                table: &str,
23385                schema: Option<&str>,
23386            ) -> Result<Option<SqlTableSchema>, IoError> {
23387                if table == "users" && schema == Some("analytics") {
23388                    Ok(Some(SqlTableSchema {
23389                        table_name: "users".to_owned(),
23390                        columns: vec![SqlColumnSchema {
23391                            name: "id".to_owned(),
23392                            declared_type: Some("BIGINT".to_owned()),
23393                            nullable: false,
23394                            default_value: None,
23395                            primary_key_ordinal: Some(0),
23396                            comment: None,
23397                            autoincrement: false,
23398                        }],
23399                    }))
23400                } else {
23401                    Ok(None)
23402                }
23403            }
23404        }
23405        let conn = MultiSchema;
23406        let analytics_users = sql_table_schema(&conn, "users", Some("analytics"))
23407            .unwrap()
23408            .expect("found");
23409        assert_eq!(
23410            analytics_users.columns[0].declared_type.as_deref(),
23411            Some("BIGINT")
23412        );
23413        assert!(
23414            sql_table_schema(&conn, "users", Some("audit"))
23415                .unwrap()
23416                .is_none()
23417        );
23418        assert!(sql_table_schema(&conn, "users", None).unwrap().is_none());
23419    }
23420
23421    // ── list_sql_schemas / SqlConnection::list_schemas (br-lxhi / fd90.22) ─
23422
23423    #[cfg(feature = "sql-sqlite")]
23424    #[test]
23425    fn list_sql_schemas_returns_empty_on_sqlite() {
23426        // SQLite has no meaningful schema concept; the trait default
23427        // (empty Vec) is the right answer.
23428        let conn = make_sql_test_conn();
23429        let schemas = list_sql_schemas(&conn).unwrap();
23430        assert!(schemas.is_empty(), "expected no schemas; got {schemas:?}");
23431    }
23432
23433    #[test]
23434    fn list_sql_schemas_default_impl_returns_empty() {
23435        struct NoIntrospection;
23436        impl super::SqlConnection for NoIntrospection {
23437            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23438                Ok(SqlQueryResult {
23439                    columns: vec![],
23440                    rows: vec![],
23441                })
23442            }
23443            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23444                Ok(())
23445            }
23446            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23447                Ok(false)
23448            }
23449            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23450                Ok(())
23451            }
23452            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23453                "TEXT"
23454            }
23455            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23456                "TEXT"
23457            }
23458        }
23459        let conn = NoIntrospection;
23460        assert!(list_sql_schemas(&conn).unwrap().is_empty());
23461    }
23462
23463    #[test]
23464    fn list_sql_schemas_routes_to_backend_override() {
23465        // Multi-schema backend stub: returns the schemas the connection's
23466        // role can see, with internal schemas filtered out.
23467        struct MultiSchemaServer;
23468        impl super::SqlConnection for MultiSchemaServer {
23469            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23470                Ok(SqlQueryResult {
23471                    columns: vec![],
23472                    rows: vec![],
23473                })
23474            }
23475            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23476                Ok(())
23477            }
23478            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23479                Ok(false)
23480            }
23481            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23482                Ok(())
23483            }
23484            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23485                "TEXT"
23486            }
23487            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23488                "TEXT"
23489            }
23490            fn supports_schemas(&self) -> bool {
23491                true
23492            }
23493            fn list_schemas(&self) -> Result<Vec<String>, IoError> {
23494                // Filter out information_schema + pg_catalog by default.
23495                Ok(vec![
23496                    "public".to_owned(),
23497                    "analytics".to_owned(),
23498                    "audit".to_owned(),
23499                ])
23500            }
23501        }
23502        let conn = MultiSchemaServer;
23503        let schemas = list_sql_schemas(&conn).unwrap();
23504        assert_eq!(schemas, vec!["public", "analytics", "audit"]);
23505        // Verify the override actually filters internal schemas
23506        // (test contract: stub never returns 'pg_catalog' or
23507        // 'information_schema').
23508        assert!(!schemas.iter().any(|s| s.starts_with("pg_")));
23509        assert!(!schemas.iter().any(|s| s == "information_schema"));
23510    }
23511
23512    #[test]
23513    fn list_sql_schemas_propagates_backend_error() {
23514        // If the backend errors during introspection, the error should
23515        // surface through the wrapper unchanged.
23516        struct BrokenIntrospection;
23517        impl super::SqlConnection for BrokenIntrospection {
23518            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23519                Ok(SqlQueryResult {
23520                    columns: vec![],
23521                    rows: vec![],
23522                })
23523            }
23524            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23525                Ok(())
23526            }
23527            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23528                Ok(false)
23529            }
23530            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23531                Ok(())
23532            }
23533            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23534                "TEXT"
23535            }
23536            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23537                "TEXT"
23538            }
23539            fn list_schemas(&self) -> Result<Vec<String>, IoError> {
23540                Err(IoError::Sql("permission denied for catalog".to_owned()))
23541            }
23542        }
23543        let conn = BrokenIntrospection;
23544        let err = list_sql_schemas(&conn).expect_err("should surface backend error");
23545        assert!(matches!(err, IoError::Sql(msg) if msg.contains("permission denied")));
23546    }
23547
23548    // ── truncate_sql_table / SqlConnection::truncate_table (br-phum / fd90.23) ─
23549
23550    #[cfg(feature = "sql-sqlite")]
23551    #[test]
23552    fn truncate_sql_table_clears_rows_but_preserves_schema() {
23553        let conn = make_sql_test_conn();
23554        super::SqlConnection::execute_batch(&conn, "CREATE TABLE rolling (id INTEGER, val TEXT);")
23555            .unwrap();
23556        super::SqlConnection::execute_batch(
23557            &conn,
23558            "INSERT INTO rolling VALUES (1, 'a'), (2, 'b'), (3, 'c');",
23559        )
23560        .unwrap();
23561        // Sanity: rows present.
23562        let before =
23563            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM rolling", &[]).unwrap();
23564        assert_eq!(before.rows[0][0], Scalar::Int64(3));
23565
23566        truncate_sql_table(&conn, "rolling", None).unwrap();
23567
23568        // Rows gone, table still there.
23569        let after =
23570            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM rolling", &[]).unwrap();
23571        assert_eq!(after.rows[0][0], Scalar::Int64(0));
23572        assert!(super::SqlConnection::table_exists(&conn, "rolling").unwrap());
23573    }
23574
23575    #[cfg(feature = "sql-sqlite")]
23576    #[test]
23577    fn truncate_sql_table_schema_silently_ignored_on_sqlite() {
23578        let conn = make_sql_test_conn();
23579        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
23580        super::SqlConnection::execute_batch(&conn, "INSERT INTO t VALUES (1);").unwrap();
23581        truncate_sql_table(&conn, "t", Some("ignored_on_sqlite"))
23582            .expect("schema arg must not error on SQLite");
23583        let count = super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM t", &[]).unwrap();
23584        assert_eq!(count.rows[0][0], Scalar::Int64(0));
23585    }
23586
23587    #[cfg(feature = "sql-sqlite")]
23588    #[test]
23589    fn truncate_sql_table_rejects_invalid_table_name() {
23590        let conn = make_sql_test_conn();
23591        let err = truncate_sql_table(&conn, "x; DROP TABLE users", None)
23592            .expect_err("must reject invalid identifier");
23593        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
23594    }
23595
23596    #[test]
23597    fn truncate_sql_table_routes_schema_to_quote_identifier() {
23598        // Multi-schema backend stub records the SQL it receives.
23599        use std::cell::RefCell;
23600        struct PgLikeRecorder {
23601            statements: RefCell<Vec<String>>,
23602        }
23603        impl super::SqlConnection for PgLikeRecorder {
23604            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23605                Ok(SqlQueryResult {
23606                    columns: vec![],
23607                    rows: vec![],
23608                })
23609            }
23610            fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
23611                self.statements.borrow_mut().push(sql.to_owned());
23612                Ok(())
23613            }
23614            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23615                Ok(false)
23616            }
23617            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23618                Ok(())
23619            }
23620            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23621                "TEXT"
23622            }
23623            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23624                "TEXT"
23625            }
23626            fn supports_schemas(&self) -> bool {
23627                true
23628            }
23629        }
23630        let conn = PgLikeRecorder {
23631            statements: RefCell::new(vec![]),
23632        };
23633        truncate_sql_table(&conn, "events", Some("analytics")).unwrap();
23634        let stmts = conn.statements.borrow();
23635        assert_eq!(stmts.len(), 1);
23636        // Default impl uses DELETE FROM ... and quote_identifier on
23637        // both schema + table parts.
23638        assert!(
23639            stmts[0].contains("DELETE FROM \"analytics\".\"events\""),
23640            "expected schema-qualified DELETE; got: {}",
23641            stmts[0]
23642        );
23643    }
23644
23645    #[test]
23646    fn truncate_sql_table_backend_override_uses_truncate_keyword() {
23647        // PG/MySQL impls would override with TRUNCATE TABLE for speed.
23648        // Verify the trait dispatch picks up the override.
23649        use std::cell::RefCell;
23650        struct FastTruncate {
23651            statements: RefCell<Vec<String>>,
23652        }
23653        impl super::SqlConnection for FastTruncate {
23654            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23655                Ok(SqlQueryResult {
23656                    columns: vec![],
23657                    rows: vec![],
23658                })
23659            }
23660            fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
23661                self.statements.borrow_mut().push(sql.to_owned());
23662                Ok(())
23663            }
23664            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23665                Ok(false)
23666            }
23667            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23668                Ok(())
23669            }
23670            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23671                "TEXT"
23672            }
23673            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23674                "TEXT"
23675            }
23676            fn truncate_table(
23677                &self,
23678                table_name: &str,
23679                _schema: Option<&str>,
23680            ) -> Result<(), IoError> {
23681                self.execute_batch(&format!("TRUNCATE TABLE \"{table_name}\""))
23682            }
23683        }
23684        let conn = FastTruncate {
23685            statements: RefCell::new(vec![]),
23686        };
23687        truncate_sql_table(&conn, "events", None).unwrap();
23688        let stmts = conn.statements.borrow();
23689        assert_eq!(stmts.len(), 1);
23690        assert!(stmts[0].starts_with("TRUNCATE TABLE"), "got: {}", stmts[0]);
23691    }
23692
23693    // ── sql_server_version / SqlConnection::server_version (br-e23k / fd90.24) ─
23694
23695    #[cfg(feature = "sql-sqlite")]
23696    #[test]
23697    fn sql_server_version_returns_sqlite_version_string() {
23698        let conn = make_sql_test_conn();
23699        let version = sql_server_version(&conn)
23700            .unwrap()
23701            .expect("SQLite reports version");
23702        // Expect dotted-version format like "3.45.1". The parts must be
23703        // non-empty digits — exact value depends on the bundled SQLite.
23704        let parts: Vec<&str> = version.split('.').collect();
23705        assert!(parts.len() >= 2, "expected dotted version; got: {version}");
23706        for part in &parts {
23707            assert!(
23708                !part.is_empty() && part.chars().all(|c| c.is_ascii_digit()),
23709                "expected numeric version parts; got {version}"
23710            );
23711        }
23712    }
23713
23714    #[cfg(feature = "sql-sqlite")]
23715    #[test]
23716    fn sql_server_version_starts_with_three_for_sqlite_3_x() {
23717        // SQLite has been at major version 3 since 2004; bundled
23718        // rusqlite is current (3.40+), so the major must be "3".
23719        let conn = make_sql_test_conn();
23720        let version = sql_server_version(&conn).unwrap().unwrap();
23721        assert!(
23722            version.starts_with("3."),
23723            "expected SQLite 3.x; got {version}"
23724        );
23725    }
23726
23727    #[test]
23728    fn sql_server_version_default_impl_returns_none() {
23729        struct NoIntrospection;
23730        impl super::SqlConnection for NoIntrospection {
23731            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23732                Ok(SqlQueryResult {
23733                    columns: vec![],
23734                    rows: vec![],
23735                })
23736            }
23737            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23738                Ok(())
23739            }
23740            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23741                Ok(false)
23742            }
23743            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23744                Ok(())
23745            }
23746            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23747                "TEXT"
23748            }
23749            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23750                "TEXT"
23751            }
23752        }
23753        let conn = NoIntrospection;
23754        assert!(sql_server_version(&conn).unwrap().is_none());
23755    }
23756
23757    #[test]
23758    fn sql_server_version_routes_to_backend_override() {
23759        struct PgLikeStub;
23760        impl super::SqlConnection for PgLikeStub {
23761            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23762                Ok(SqlQueryResult {
23763                    columns: vec![],
23764                    rows: vec![],
23765                })
23766            }
23767            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23768                Ok(())
23769            }
23770            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23771                Ok(false)
23772            }
23773            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23774                Ok(())
23775            }
23776            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23777                "TEXT"
23778            }
23779            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23780                "TEXT"
23781            }
23782            fn server_version(&self) -> Result<Option<String>, IoError> {
23783                // Mimic `SHOW server_version` payload.
23784                Ok(Some("16.2".to_owned()))
23785            }
23786        }
23787        let conn = PgLikeStub;
23788        assert_eq!(sql_server_version(&conn).unwrap().as_deref(), Some("16.2"));
23789    }
23790
23791    #[test]
23792    fn sql_server_version_propagates_backend_error() {
23793        struct BrokenIntrospection;
23794        impl super::SqlConnection for BrokenIntrospection {
23795            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23796                Ok(SqlQueryResult {
23797                    columns: vec![],
23798                    rows: vec![],
23799                })
23800            }
23801            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23802                Ok(())
23803            }
23804            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23805                Ok(false)
23806            }
23807            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23808                Ok(())
23809            }
23810            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23811                "TEXT"
23812            }
23813            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23814                "TEXT"
23815            }
23816            fn server_version(&self) -> Result<Option<String>, IoError> {
23817                Err(IoError::Sql("connection lost".to_owned()))
23818            }
23819        }
23820        let conn = BrokenIntrospection;
23821        let err = sql_server_version(&conn).expect_err("should surface backend error");
23822        assert!(matches!(err, IoError::Sql(msg) if msg.contains("connection lost")));
23823    }
23824
23825    // ── sql_primary_key_columns / SqlConnection::primary_key_columns
23826    //    (br-uw3y / fd90.25) ────────────────────────────────────────────────
23827
23828    #[cfg(feature = "sql-sqlite")]
23829    #[test]
23830    fn sql_primary_key_columns_unknown_table_returns_empty() {
23831        let conn = make_sql_test_conn();
23832        let pk = sql_primary_key_columns(&conn, "no_such_table", None).unwrap();
23833        assert!(pk.is_empty());
23834    }
23835
23836    #[cfg(feature = "sql-sqlite")]
23837    #[test]
23838    fn sql_primary_key_columns_table_without_pk_returns_empty() {
23839        let conn = make_sql_test_conn();
23840        super::SqlConnection::execute_batch(&conn, "CREATE TABLE no_pk (a INTEGER, b TEXT);")
23841            .unwrap();
23842        let pk = sql_primary_key_columns(&conn, "no_pk", None).unwrap();
23843        assert!(pk.is_empty());
23844    }
23845
23846    #[cfg(feature = "sql-sqlite")]
23847    #[test]
23848    fn sql_primary_key_columns_single_pk() {
23849        let conn = make_sql_test_conn();
23850        super::SqlConnection::execute_batch(
23851            &conn,
23852            "CREATE TABLE single_pk (id INTEGER PRIMARY KEY, name TEXT);",
23853        )
23854        .unwrap();
23855        let pk = sql_primary_key_columns(&conn, "single_pk", None).unwrap();
23856        assert_eq!(pk, vec!["id"]);
23857    }
23858
23859    #[cfg(feature = "sql-sqlite")]
23860    #[test]
23861    fn sql_primary_key_columns_composite_pk_ordered_by_ordinal() {
23862        let conn = make_sql_test_conn();
23863        super::SqlConnection::execute_batch(
23864            &conn,
23865            "CREATE TABLE composite ( \
23866                year INTEGER NOT NULL, \
23867                month INTEGER NOT NULL, \
23868                code TEXT NOT NULL, \
23869                value REAL, \
23870                PRIMARY KEY (year, month, code) \
23871             );",
23872        )
23873        .unwrap();
23874        let pk = sql_primary_key_columns(&conn, "composite", None).unwrap();
23875        // PK declaration order: year, month, code.
23876        assert_eq!(pk, vec!["year", "month", "code"]);
23877    }
23878
23879    #[test]
23880    fn sql_primary_key_columns_default_impl_returns_empty_when_no_introspection() {
23881        // Backend with no table_schema override returns Ok(None) →
23882        // primary_key_columns falls through to empty Vec.
23883        struct NoIntrospection;
23884        impl super::SqlConnection for NoIntrospection {
23885            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23886                Ok(SqlQueryResult {
23887                    columns: vec![],
23888                    rows: vec![],
23889                })
23890            }
23891            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23892                Ok(())
23893            }
23894            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23895                Ok(false)
23896            }
23897            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23898                Ok(())
23899            }
23900            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23901                "TEXT"
23902            }
23903            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23904                "TEXT"
23905            }
23906        }
23907        let conn = NoIntrospection;
23908        assert!(
23909            sql_primary_key_columns(&conn, "anything", None)
23910                .unwrap()
23911                .is_empty()
23912        );
23913    }
23914
23915    #[test]
23916    fn sql_primary_key_columns_routes_schema_to_table_schema_override() {
23917        // Backend that returns ordinal-sorted PK columns from a
23918        // multi-schema introspection.
23919        struct MultiSchemaPk;
23920        impl super::SqlConnection for MultiSchemaPk {
23921            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
23922                Ok(SqlQueryResult {
23923                    columns: vec![],
23924                    rows: vec![],
23925                })
23926            }
23927            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
23928                Ok(())
23929            }
23930            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
23931                Ok(false)
23932            }
23933            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
23934                Ok(())
23935            }
23936            fn dtype_sql(&self, _dtype: DType) -> &'static str {
23937                "TEXT"
23938            }
23939            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
23940                "TEXT"
23941            }
23942            fn supports_schemas(&self) -> bool {
23943                true
23944            }
23945            fn table_schema(
23946                &self,
23947                table: &str,
23948                schema: Option<&str>,
23949            ) -> Result<Option<SqlTableSchema>, IoError> {
23950                if table == "events" && schema == Some("analytics") {
23951                    Ok(Some(SqlTableSchema {
23952                        table_name: "events".to_owned(),
23953                        columns: vec![
23954                            // Intentionally out-of-declaration-order to
23955                            // verify the helper sorts by ordinal.
23956                            SqlColumnSchema {
23957                                name: "code".to_owned(),
23958                                declared_type: Some("TEXT".to_owned()),
23959                                nullable: false,
23960                                default_value: None,
23961                                primary_key_ordinal: Some(2),
23962                                comment: None,
23963                                autoincrement: false,
23964                            },
23965                            SqlColumnSchema {
23966                                name: "year".to_owned(),
23967                                declared_type: Some("INTEGER".to_owned()),
23968                                nullable: false,
23969                                default_value: None,
23970                                primary_key_ordinal: Some(0),
23971                                comment: None,
23972                                autoincrement: false,
23973                            },
23974                            SqlColumnSchema {
23975                                name: "value".to_owned(),
23976                                declared_type: Some("REAL".to_owned()),
23977                                nullable: true,
23978                                default_value: None,
23979                                primary_key_ordinal: None,
23980                                comment: None,
23981                                autoincrement: false,
23982                            },
23983                            SqlColumnSchema {
23984                                name: "month".to_owned(),
23985                                declared_type: Some("INTEGER".to_owned()),
23986                                nullable: false,
23987                                default_value: None,
23988                                primary_key_ordinal: Some(1),
23989                                comment: None,
23990                                autoincrement: false,
23991                            },
23992                        ],
23993                    }))
23994                } else {
23995                    Ok(None)
23996                }
23997            }
23998        }
23999        let conn = MultiSchemaPk;
24000        let pk = sql_primary_key_columns(&conn, "events", Some("analytics")).unwrap();
24001        // Sorted by primary_key_ordinal: 0=year, 1=month, 2=code.
24002        assert_eq!(pk, vec!["year", "month", "code"]);
24003        // Wrong schema → empty (table_schema returns None).
24004        assert!(
24005            sql_primary_key_columns(&conn, "events", Some("audit"))
24006                .unwrap()
24007                .is_empty()
24008        );
24009    }
24010
24011    // ── sql_max_identifier_length / SqlConnection::max_identifier_length
24012    //    (br-cs81 / fd90.26) ────────────────────────────────────────────────
24013
24014    #[cfg(feature = "sql-sqlite")]
24015    #[test]
24016    fn sql_max_identifier_length_returns_none_on_sqlite() {
24017        // SQLite has no documented identifier-length limit; the trait
24018        // default (None) is the right answer.
24019        let conn = make_sql_test_conn();
24020        assert_eq!(sql_max_identifier_length(&conn), None);
24021    }
24022
24023    #[test]
24024    fn sql_max_identifier_length_default_impl_returns_none() {
24025        struct Generic;
24026        impl super::SqlConnection for Generic {
24027            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24028                Ok(SqlQueryResult {
24029                    columns: vec![],
24030                    rows: vec![],
24031                })
24032            }
24033            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24034                Ok(())
24035            }
24036            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24037                Ok(false)
24038            }
24039            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24040                Ok(())
24041            }
24042            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24043                "TEXT"
24044            }
24045            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24046                "TEXT"
24047            }
24048        }
24049        assert_eq!(sql_max_identifier_length(&Generic), None);
24050    }
24051
24052    #[test]
24053    fn sql_max_identifier_length_pg_override_reports_63() {
24054        struct PgLikeStub;
24055        impl super::SqlConnection for PgLikeStub {
24056            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24057                Ok(SqlQueryResult {
24058                    columns: vec![],
24059                    rows: vec![],
24060                })
24061            }
24062            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24063                Ok(())
24064            }
24065            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24066                Ok(false)
24067            }
24068            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24069                Ok(())
24070            }
24071            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24072                "TEXT"
24073            }
24074            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24075                "TEXT"
24076            }
24077            fn max_identifier_length(&self) -> Option<usize> {
24078                Some(63)
24079            }
24080        }
24081        assert_eq!(sql_max_identifier_length(&PgLikeStub), Some(63));
24082    }
24083
24084    #[test]
24085    fn sql_max_identifier_length_mysql_override_reports_64() {
24086        struct MySqlLikeStub;
24087        impl super::SqlConnection for MySqlLikeStub {
24088            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24089                Ok(SqlQueryResult {
24090                    columns: vec![],
24091                    rows: vec![],
24092                })
24093            }
24094            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24095                Ok(())
24096            }
24097            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24098                Ok(false)
24099            }
24100            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24101                Ok(())
24102            }
24103            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24104                "TEXT"
24105            }
24106            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24107                "TEXT"
24108            }
24109            fn max_identifier_length(&self) -> Option<usize> {
24110                Some(64)
24111            }
24112        }
24113        assert_eq!(sql_max_identifier_length(&MySqlLikeStub), Some(64));
24114    }
24115
24116    #[test]
24117    fn sql_max_identifier_length_mssql_override_reports_128() {
24118        struct MsSqlLikeStub;
24119        impl super::SqlConnection for MsSqlLikeStub {
24120            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24121                Ok(SqlQueryResult {
24122                    columns: vec![],
24123                    rows: vec![],
24124                })
24125            }
24126            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24127                Ok(())
24128            }
24129            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24130                Ok(false)
24131            }
24132            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24133                Ok(())
24134            }
24135            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24136                "TEXT"
24137            }
24138            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24139                "TEXT"
24140            }
24141            fn max_identifier_length(&self) -> Option<usize> {
24142                Some(128)
24143            }
24144        }
24145        assert_eq!(sql_max_identifier_length(&MsSqlLikeStub), Some(128));
24146    }
24147
24148    // ── sql backend capability probes / SqlInspector caps
24149    //    (frankenpandas-fd90.10) ───────────────────────────────────────────
24150
24151    #[cfg(feature = "sql-sqlite")]
24152    #[test]
24153    fn sql_backend_caps_sqlite_reports_param_and_row_caps() {
24154        let conn = make_sql_test_conn();
24155        let caps = sql_backend_caps(&conn).unwrap();
24156
24157        assert_eq!(caps.dialect_name, "sqlite");
24158        assert!(
24159            caps.server_version
24160                .as_deref()
24161                .is_some_and(|v| v.starts_with("3."))
24162        );
24163        assert!(caps.supports_returning);
24164        assert!(!caps.supports_schemas);
24165        assert_eq!(caps.max_param_count, Some(32766));
24166        assert_eq!(caps.max_identifier_length, None);
24167        assert_eq!(caps.max_insert_rows(3), Some(10922));
24168        assert_eq!(caps.max_insert_rows(0), None);
24169        assert_eq!(sql_max_param_count(&conn), Some(32766));
24170        assert_eq!(sql_max_insert_rows(&conn, 4), Some(8191));
24171        assert!(sql_supports_returning(&conn));
24172        assert!(!sql_supports_schemas(&conn));
24173    }
24174
24175    #[test]
24176    fn sql_inspector_backend_caps_pg_like_stub_reports_limits() {
24177        struct PgLikeCaps;
24178        impl super::SqlConnection for PgLikeCaps {
24179            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24180                Ok(SqlQueryResult {
24181                    columns: vec![],
24182                    rows: vec![],
24183                })
24184            }
24185            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24186                Ok(())
24187            }
24188            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24189                Ok(false)
24190            }
24191            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24192                Ok(())
24193            }
24194            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24195                "TEXT"
24196            }
24197            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24198                "TEXT"
24199            }
24200            fn dialect_name(&self) -> &'static str {
24201                "postgresql"
24202            }
24203            fn server_version(&self) -> Result<Option<String>, IoError> {
24204                Ok(Some("16.3".to_owned()))
24205            }
24206            fn supports_returning(&self) -> bool {
24207                true
24208            }
24209            fn supports_schemas(&self) -> bool {
24210                true
24211            }
24212            fn max_param_count(&self) -> Option<usize> {
24213                Some(65535)
24214            }
24215            fn max_identifier_length(&self) -> Option<usize> {
24216                Some(63)
24217            }
24218        }
24219
24220        let conn = PgLikeCaps;
24221        let inspector = SqlInspector::new(&conn);
24222        let caps = inspector.backend_caps().unwrap();
24223
24224        assert_eq!(inspector.dialect_name(), "postgresql");
24225        assert_eq!(inspector.server_version().unwrap().as_deref(), Some("16.3"));
24226        assert!(inspector.supports_returning());
24227        assert!(inspector.supports_schemas());
24228        assert_eq!(inspector.max_param_count(), Some(65535));
24229        assert_eq!(inspector.max_identifier_length(), Some(63));
24230        assert_eq!(inspector.max_insert_rows(4), Some(16383));
24231        assert_eq!(caps.max_insert_rows(4), Some(16383));
24232        assert_eq!(
24233            caps,
24234            SqlBackendCaps {
24235                dialect_name: "postgresql",
24236                server_version: Some("16.3".to_owned()),
24237                supports_returning: true,
24238                supports_schemas: true,
24239                max_param_count: Some(65535),
24240                max_identifier_length: Some(63),
24241            }
24242        );
24243    }
24244
24245    // ── write_sql identifier-length validation (br-9ynk / fd90.27) ────────
24246
24247    #[cfg(feature = "sql-sqlite")]
24248    #[test]
24249    fn write_sql_long_column_name_succeeds_on_sqlite() {
24250        // SQLite reports max_identifier_length() == None → no validation.
24251        let conn = make_sql_test_conn();
24252        // 80 chars > PG/MySQL caps but fine on SQLite.
24253        let long_col: String = std::iter::repeat_n('a', 80).collect();
24254        let frame = fp_frame::DataFrame::from_dict(
24255            &[long_col.as_str()],
24256            vec![(long_col.as_str(), vec![Scalar::Int64(1)])],
24257        )
24258        .unwrap();
24259        write_sql_with_options(
24260            &frame,
24261            &conn,
24262            "long_col_tbl",
24263            &SqlWriteOptions {
24264                if_exists: SqlIfExists::Fail,
24265                index: false,
24266                index_label: None,
24267                schema: None,
24268                dtype: None,
24269                method: SqlInsertMethod::Single,
24270                chunksize: None,
24271            },
24272        )
24273        .expect("SQLite has no identifier limit");
24274    }
24275
24276    fn make_pg_like_recorder() -> impl super::SqlConnection + 'static {
24277        // Stub PG-like backend: enforces 63-char limit, accepts all
24278        // execute_batch / insert_rows so write_sql can reach the
24279        // identifier-length check before failing on emit.
24280        struct PgLikeLimit;
24281        impl super::SqlConnection for PgLikeLimit {
24282            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24283                Ok(SqlQueryResult {
24284                    columns: vec![],
24285                    rows: vec![],
24286                })
24287            }
24288            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24289                Ok(())
24290            }
24291            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24292                Ok(false)
24293            }
24294            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24295                Ok(())
24296            }
24297            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24298                "TEXT"
24299            }
24300            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24301                "TEXT"
24302            }
24303            fn max_identifier_length(&self) -> Option<usize> {
24304                Some(63)
24305            }
24306            fn supports_schemas(&self) -> bool {
24307                true
24308            }
24309        }
24310        PgLikeLimit
24311    }
24312
24313    #[test]
24314    fn write_sql_rejects_long_column_name_on_pg_like_backend() {
24315        let conn = make_pg_like_recorder();
24316        let long_col: String = std::iter::repeat_n('c', 64).collect();
24317        let frame = fp_frame::DataFrame::from_dict(
24318            &[long_col.as_str()],
24319            vec![(long_col.as_str(), vec![Scalar::Int64(1)])],
24320        )
24321        .unwrap();
24322        let err = write_sql_with_options(
24323            &frame,
24324            &conn,
24325            "ok_tbl",
24326            &SqlWriteOptions {
24327                if_exists: SqlIfExists::Fail,
24328                index: false,
24329                index_label: None,
24330                schema: None,
24331                dtype: None,
24332                method: SqlInsertMethod::Single,
24333                chunksize: None,
24334            },
24335        )
24336        .expect_err("64-char column must exceed PG limit");
24337        assert!(matches!(err, IoError::Sql(msg) if msg.contains("column") && msg.contains("63")));
24338    }
24339
24340    #[test]
24341    fn write_sql_rejects_long_table_name_on_pg_like_backend() {
24342        let conn = make_pg_like_recorder();
24343        // 64-char identifier (table names also subject to the PG cap).
24344        // Use only alphanumeric so validate_sql_table_name passes first.
24345        let long_tbl: String = std::iter::repeat_n('t', 64).collect();
24346        let frame =
24347            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24348        let err = write_sql_with_options(
24349            &frame,
24350            &conn,
24351            &long_tbl,
24352            &SqlWriteOptions {
24353                if_exists: SqlIfExists::Fail,
24354                index: false,
24355                index_label: None,
24356                schema: None,
24357                dtype: None,
24358                method: SqlInsertMethod::Single,
24359                chunksize: None,
24360            },
24361        )
24362        .expect_err("64-char table must exceed PG limit");
24363        assert!(matches!(err, IoError::Sql(msg) if msg.contains("table") && msg.contains("63")));
24364    }
24365
24366    #[test]
24367    fn write_sql_rejects_long_index_label_on_pg_like_backend() {
24368        let conn = make_pg_like_recorder();
24369        let long_label: String = std::iter::repeat_n('i', 64).collect();
24370        let frame =
24371            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24372        let err = write_sql_with_options(
24373            &frame,
24374            &conn,
24375            "ok_tbl",
24376            &SqlWriteOptions {
24377                if_exists: SqlIfExists::Fail,
24378                index: true,
24379                index_label: Some(long_label),
24380                schema: None,
24381                dtype: None,
24382                method: SqlInsertMethod::Single,
24383                chunksize: None,
24384            },
24385        )
24386        .expect_err("64-char index label must exceed PG limit");
24387        assert!(
24388            matches!(err, IoError::Sql(msg) if msg.contains("index label") && msg.contains("63"))
24389        );
24390    }
24391
24392    #[test]
24393    fn write_sql_rejects_long_schema_name_on_pg_like_backend() {
24394        let conn = make_pg_like_recorder();
24395        let long_schema: String = std::iter::repeat_n('s', 64).collect();
24396        let frame =
24397            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
24398        let err = write_sql_with_options(
24399            &frame,
24400            &conn,
24401            "ok_tbl",
24402            &SqlWriteOptions {
24403                if_exists: SqlIfExists::Fail,
24404                index: false,
24405                index_label: None,
24406                schema: Some(long_schema),
24407                dtype: None,
24408                method: SqlInsertMethod::Single,
24409                chunksize: None,
24410            },
24411        )
24412        .expect_err("64-char schema must exceed PG limit");
24413        assert!(matches!(err, IoError::Sql(msg) if msg.contains("schema") && msg.contains("63")));
24414    }
24415
24416    #[test]
24417    fn write_sql_just_at_the_boundary_is_accepted() {
24418        let conn = make_pg_like_recorder();
24419        // Exactly 63 chars: at the PG limit, must be accepted.
24420        let just_fits: String = std::iter::repeat_n('a', 63).collect();
24421        let frame = fp_frame::DataFrame::from_dict(
24422            &[just_fits.as_str()],
24423            vec![(just_fits.as_str(), vec![Scalar::Int64(1)])],
24424        )
24425        .unwrap();
24426        write_sql_with_options(
24427            &frame,
24428            &conn,
24429            "ok_tbl",
24430            &SqlWriteOptions {
24431                if_exists: SqlIfExists::Fail,
24432                index: false,
24433                index_label: None,
24434                schema: None,
24435                dtype: None,
24436                method: SqlInsertMethod::Single,
24437                chunksize: None,
24438            },
24439        )
24440        .expect("63-char column at boundary should be accepted");
24441    }
24442
24443    // ── list_sql_indexes / SqlConnection::list_indexes (br-bgv9 / fd90.28) ─
24444
24445    #[cfg(feature = "sql-sqlite")]
24446    #[test]
24447    fn list_sql_indexes_unknown_table_returns_empty() {
24448        let conn = make_sql_test_conn();
24449        let indexes = list_sql_indexes(&conn, "no_such_tbl", None).unwrap();
24450        assert!(indexes.is_empty());
24451    }
24452
24453    #[cfg(feature = "sql-sqlite")]
24454    #[test]
24455    fn list_sql_indexes_table_without_indexes() {
24456        let conn = make_sql_test_conn();
24457        super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
24458            .unwrap();
24459        let indexes = list_sql_indexes(&conn, "plain", None).unwrap();
24460        assert!(indexes.is_empty());
24461    }
24462
24463    #[cfg(feature = "sql-sqlite")]
24464    #[test]
24465    fn list_sql_indexes_single_column() {
24466        let conn = make_sql_test_conn();
24467        super::SqlConnection::execute_batch(&conn, "CREATE TABLE events (id INTEGER, ts TEXT);")
24468            .unwrap();
24469        super::SqlConnection::execute_batch(&conn, "CREATE INDEX idx_events_ts ON events (ts);")
24470            .unwrap();
24471        let indexes = list_sql_indexes(&conn, "events", None).unwrap();
24472        assert_eq!(indexes.len(), 1);
24473        assert_eq!(indexes[0].name, "idx_events_ts");
24474        assert_eq!(indexes[0].columns, vec!["ts"]);
24475        assert!(!indexes[0].unique);
24476    }
24477
24478    #[cfg(feature = "sql-sqlite")]
24479    #[test]
24480    fn list_sql_indexes_unique_index() {
24481        let conn = make_sql_test_conn();
24482        super::SqlConnection::execute_batch(&conn, "CREATE TABLE users (id INTEGER, email TEXT);")
24483            .unwrap();
24484        super::SqlConnection::execute_batch(
24485            &conn,
24486            "CREATE UNIQUE INDEX idx_users_email ON users (email);",
24487        )
24488        .unwrap();
24489        let indexes = list_sql_indexes(&conn, "users", None).unwrap();
24490        assert_eq!(indexes.len(), 1);
24491        assert_eq!(indexes[0].name, "idx_users_email");
24492        assert_eq!(indexes[0].columns, vec!["email"]);
24493        assert!(indexes[0].unique);
24494    }
24495
24496    #[cfg(feature = "sql-sqlite")]
24497    #[test]
24498    fn list_sql_indexes_composite_columns_in_definition_order() {
24499        let conn = make_sql_test_conn();
24500        super::SqlConnection::execute_batch(
24501            &conn,
24502            "CREATE TABLE rolling (year INT, month INT, code TEXT, val REAL);",
24503        )
24504        .unwrap();
24505        super::SqlConnection::execute_batch(
24506            &conn,
24507            "CREATE INDEX idx_rolling_y_m_c ON rolling (year, month, code);",
24508        )
24509        .unwrap();
24510        let indexes = list_sql_indexes(&conn, "rolling", None).unwrap();
24511        assert_eq!(indexes.len(), 1);
24512        assert_eq!(indexes[0].columns, vec!["year", "month", "code"]);
24513    }
24514
24515    #[cfg(feature = "sql-sqlite")]
24516    #[test]
24517    fn list_sql_indexes_filters_pk_auto_index() {
24518        // INTEGER PRIMARY KEY in SQLite creates an automatic index that
24519        // SQLAlchemy.Inspector hides. We must hide it too — only the
24520        // explicit CREATE INDEX should surface.
24521        let conn = make_sql_test_conn();
24522        super::SqlConnection::execute_batch(
24523            &conn,
24524            "CREATE TABLE pk_only (id INTEGER PRIMARY KEY, name TEXT);",
24525        )
24526        .unwrap();
24527        super::SqlConnection::execute_batch(
24528            &conn,
24529            "CREATE INDEX idx_pk_only_name ON pk_only (name);",
24530        )
24531        .unwrap();
24532        let indexes = list_sql_indexes(&conn, "pk_only", None).unwrap();
24533        // Only the explicit user index should appear.
24534        assert_eq!(indexes.len(), 1);
24535        assert_eq!(indexes[0].name, "idx_pk_only_name");
24536    }
24537
24538    #[cfg(feature = "sql-sqlite")]
24539    #[test]
24540    fn list_sql_indexes_rejects_invalid_table_name() {
24541        let conn = make_sql_test_conn();
24542        let err = list_sql_indexes(&conn, "x; DROP TABLE users", None)
24543            .expect_err("must reject invalid identifier");
24544        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
24545    }
24546
24547    #[test]
24548    fn list_sql_indexes_default_impl_returns_empty() {
24549        struct NoIntrospection;
24550        impl super::SqlConnection for NoIntrospection {
24551            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24552                Ok(SqlQueryResult {
24553                    columns: vec![],
24554                    rows: vec![],
24555                })
24556            }
24557            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24558                Ok(())
24559            }
24560            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24561                Ok(false)
24562            }
24563            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24564                Ok(())
24565            }
24566            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24567                "TEXT"
24568            }
24569            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24570                "TEXT"
24571            }
24572        }
24573        assert!(
24574            list_sql_indexes(&NoIntrospection, "anything", None)
24575                .unwrap()
24576                .is_empty()
24577        );
24578    }
24579
24580    #[test]
24581    fn list_sql_indexes_routes_to_backend_override() {
24582        struct MultiSchemaIdx;
24583        impl super::SqlConnection for MultiSchemaIdx {
24584            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24585                Ok(SqlQueryResult {
24586                    columns: vec![],
24587                    rows: vec![],
24588                })
24589            }
24590            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24591                Ok(())
24592            }
24593            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24594                Ok(false)
24595            }
24596            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24597                Ok(())
24598            }
24599            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24600                "TEXT"
24601            }
24602            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24603                "TEXT"
24604            }
24605            fn supports_schemas(&self) -> bool {
24606                true
24607            }
24608            fn list_indexes(
24609                &self,
24610                table: &str,
24611                schema: Option<&str>,
24612            ) -> Result<Vec<SqlIndexSchema>, IoError> {
24613                if table == "events" && schema == Some("analytics") {
24614                    Ok(vec![
24615                        SqlIndexSchema {
24616                            name: "idx_events_ts".to_owned(),
24617                            columns: vec!["ts".to_owned()],
24618                            unique: false,
24619                        },
24620                        SqlIndexSchema {
24621                            name: "uq_events_uid".to_owned(),
24622                            columns: vec!["user_id".to_owned()],
24623                            unique: true,
24624                        },
24625                    ])
24626                } else {
24627                    Ok(vec![])
24628                }
24629            }
24630        }
24631        let conn = MultiSchemaIdx;
24632        let indexes = list_sql_indexes(&conn, "events", Some("analytics")).unwrap();
24633        assert_eq!(indexes.len(), 2);
24634        assert!(
24635            indexes
24636                .iter()
24637                .any(|i| i.unique && i.name == "uq_events_uid")
24638        );
24639        // Wrong schema → empty (override scopes by Some).
24640        assert!(
24641            list_sql_indexes(&conn, "events", Some("audit"))
24642                .unwrap()
24643                .is_empty()
24644        );
24645    }
24646
24647    // ── list_sql_foreign_keys / SqlConnection::list_foreign_keys
24648    //    (br-uht8 / fd90.29) ────────────────────────────────────────────────
24649
24650    #[cfg(feature = "sql-sqlite")]
24651    #[test]
24652    fn list_sql_foreign_keys_unknown_table_returns_empty() {
24653        let conn = make_sql_test_conn();
24654        let fks = list_sql_foreign_keys(&conn, "no_such_tbl", None).unwrap();
24655        assert!(fks.is_empty());
24656    }
24657
24658    #[cfg(feature = "sql-sqlite")]
24659    #[test]
24660    fn list_sql_foreign_keys_table_without_fk() {
24661        let conn = make_sql_test_conn();
24662        super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
24663            .unwrap();
24664        let fks = list_sql_foreign_keys(&conn, "plain", None).unwrap();
24665        assert!(fks.is_empty());
24666    }
24667
24668    #[cfg(feature = "sql-sqlite")]
24669    #[test]
24670    fn list_sql_foreign_keys_single_column_fk() {
24671        let conn = make_sql_test_conn();
24672        super::SqlConnection::execute_batch(
24673            &conn,
24674            "CREATE TABLE parent (id INTEGER PRIMARY KEY, label TEXT);",
24675        )
24676        .unwrap();
24677        super::SqlConnection::execute_batch(
24678            &conn,
24679            "CREATE TABLE child (cid INTEGER, parent_id INTEGER, \
24680             FOREIGN KEY (parent_id) REFERENCES parent(id));",
24681        )
24682        .unwrap();
24683        let fks = list_sql_foreign_keys(&conn, "child", None).unwrap();
24684        assert_eq!(fks.len(), 1);
24685        assert_eq!(fks[0].columns, vec!["parent_id"]);
24686        assert_eq!(fks[0].referenced_table, "parent");
24687        assert_eq!(fks[0].referenced_columns, vec!["id"]);
24688        // SQLite PRAGMA does not surface constraint names.
24689        assert!(fks[0].constraint_name.is_none());
24690    }
24691
24692    #[cfg(feature = "sql-sqlite")]
24693    #[test]
24694    fn list_sql_foreign_keys_composite_fk_ordered_by_seq() {
24695        let conn = make_sql_test_conn();
24696        super::SqlConnection::execute_batch(
24697            &conn,
24698            "CREATE TABLE rolling ( \
24699                year INTEGER NOT NULL, \
24700                month INTEGER NOT NULL, \
24701                code TEXT NOT NULL, \
24702                PRIMARY KEY (year, month, code) \
24703             );",
24704        )
24705        .unwrap();
24706        super::SqlConnection::execute_batch(
24707            &conn,
24708            "CREATE TABLE rolling_fact ( \
24709                fact_id INTEGER, year INTEGER, month INTEGER, code TEXT, \
24710                FOREIGN KEY (year, month, code) \
24711                  REFERENCES rolling(year, month, code) \
24712             );",
24713        )
24714        .unwrap();
24715        let fks = list_sql_foreign_keys(&conn, "rolling_fact", None).unwrap();
24716        assert_eq!(fks.len(), 1);
24717        // Pairs preserved in declaration order (seq=0,1,2).
24718        assert_eq!(fks[0].columns, vec!["year", "month", "code"]);
24719        assert_eq!(fks[0].referenced_columns, vec!["year", "month", "code"]);
24720        assert_eq!(fks[0].referenced_table, "rolling");
24721    }
24722
24723    #[cfg(feature = "sql-sqlite")]
24724    #[test]
24725    fn list_sql_foreign_keys_multiple_fks_on_one_table() {
24726        let conn = make_sql_test_conn();
24727        super::SqlConnection::execute_batch(&conn, "CREATE TABLE users (id INTEGER PRIMARY KEY);")
24728            .unwrap();
24729        super::SqlConnection::execute_batch(&conn, "CREATE TABLE products (sku TEXT PRIMARY KEY);")
24730            .unwrap();
24731        super::SqlConnection::execute_batch(
24732            &conn,
24733            "CREATE TABLE orders ( \
24734                oid INTEGER, \
24735                user_id INTEGER, \
24736                product_sku TEXT, \
24737                FOREIGN KEY (user_id) REFERENCES users(id), \
24738                FOREIGN KEY (product_sku) REFERENCES products(sku) \
24739             );",
24740        )
24741        .unwrap();
24742        let fks = list_sql_foreign_keys(&conn, "orders", None).unwrap();
24743        assert_eq!(fks.len(), 2);
24744        let user_fk = fks.iter().find(|f| f.referenced_table == "users").unwrap();
24745        assert_eq!(user_fk.columns, vec!["user_id"]);
24746        assert_eq!(user_fk.referenced_columns, vec!["id"]);
24747        let prod_fk = fks
24748            .iter()
24749            .find(|f| f.referenced_table == "products")
24750            .unwrap();
24751        assert_eq!(prod_fk.columns, vec!["product_sku"]);
24752        assert_eq!(prod_fk.referenced_columns, vec!["sku"]);
24753    }
24754
24755    #[cfg(feature = "sql-sqlite")]
24756    #[test]
24757    fn list_sql_foreign_keys_rejects_invalid_table_name() {
24758        let conn = make_sql_test_conn();
24759        let err = list_sql_foreign_keys(&conn, "x; DROP TABLE users", None)
24760            .expect_err("must reject invalid identifier");
24761        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
24762    }
24763
24764    #[cfg(feature = "sql-sqlite")]
24765    #[test]
24766    fn list_sql_foreign_keys_resolves_implicit_pk_single_column() {
24767        // Per fd90.44: FOREIGN KEY (parent_id) REFERENCES parent
24768        // (no column list) is an implicit reference to parent's PK.
24769        // SQLite returns NULL for the 'to' column; we now resolve via
24770        // the parent's primary_key_columns.
24771        let conn = make_sql_test_conn();
24772        super::SqlConnection::execute_batch(
24773            &conn,
24774            "CREATE TABLE imp_parent (pid INTEGER PRIMARY KEY, label TEXT);",
24775        )
24776        .unwrap();
24777        super::SqlConnection::execute_batch(
24778            &conn,
24779            "CREATE TABLE imp_child ( \
24780                cid INTEGER, \
24781                parent_id INTEGER, \
24782                FOREIGN KEY (parent_id) REFERENCES imp_parent \
24783             );",
24784        )
24785        .unwrap();
24786        let fks = list_sql_foreign_keys(&conn, "imp_child", None).unwrap();
24787        assert_eq!(
24788            fks.len(),
24789            1,
24790            "implicit-PK FK must surface (was being silently dropped)"
24791        );
24792        assert_eq!(fks[0].columns, vec!["parent_id"]);
24793        assert_eq!(fks[0].referenced_table, "imp_parent");
24794        // resolved from parent's PK.
24795        assert_eq!(fks[0].referenced_columns, vec!["pid"]);
24796    }
24797
24798    #[cfg(feature = "sql-sqlite")]
24799    #[test]
24800    fn list_sql_foreign_keys_resolves_implicit_pk_composite() {
24801        // Composite FK with implicit reference to composite PK.
24802        let conn = make_sql_test_conn();
24803        super::SqlConnection::execute_batch(
24804            &conn,
24805            "CREATE TABLE imp_parent_comp ( \
24806                year INTEGER NOT NULL, \
24807                month INTEGER NOT NULL, \
24808                PRIMARY KEY (year, month) \
24809             );",
24810        )
24811        .unwrap();
24812        super::SqlConnection::execute_batch(
24813            &conn,
24814            "CREATE TABLE imp_child_comp ( \
24815                cid INTEGER, \
24816                fyear INTEGER NOT NULL, \
24817                fmonth INTEGER NOT NULL, \
24818                FOREIGN KEY (fyear, fmonth) REFERENCES imp_parent_comp \
24819             );",
24820        )
24821        .unwrap();
24822        let fks = list_sql_foreign_keys(&conn, "imp_child_comp", None).unwrap();
24823        assert_eq!(fks.len(), 1);
24824        assert_eq!(fks[0].columns, vec!["fyear", "fmonth"]);
24825        assert_eq!(fks[0].referenced_table, "imp_parent_comp");
24826        // Resolved from composite PK in declaration order.
24827        assert_eq!(fks[0].referenced_columns, vec!["year", "month"]);
24828    }
24829
24830    #[cfg(feature = "sql-sqlite")]
24831    #[test]
24832    fn list_sql_foreign_keys_explicit_columns_unchanged() {
24833        // Existing behavior preserved: explicit columns still
24834        // round-trip exactly as before fd90.44.
24835        let conn = make_sql_test_conn();
24836        super::SqlConnection::execute_batch(
24837            &conn,
24838            "CREATE TABLE exp_parent (pid INTEGER PRIMARY KEY);",
24839        )
24840        .unwrap();
24841        super::SqlConnection::execute_batch(
24842            &conn,
24843            "CREATE TABLE exp_child ( \
24844                cid INTEGER, \
24845                parent_id INTEGER, \
24846                FOREIGN KEY (parent_id) REFERENCES exp_parent(pid) \
24847             );",
24848        )
24849        .unwrap();
24850        let fks = list_sql_foreign_keys(&conn, "exp_child", None).unwrap();
24851        assert_eq!(fks.len(), 1);
24852        assert_eq!(fks[0].columns, vec!["parent_id"]);
24853        assert_eq!(fks[0].referenced_columns, vec!["pid"]);
24854    }
24855
24856    #[test]
24857    fn list_sql_foreign_keys_default_impl_returns_empty() {
24858        struct NoIntrospection;
24859        impl super::SqlConnection for NoIntrospection {
24860            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24861                Ok(SqlQueryResult {
24862                    columns: vec![],
24863                    rows: vec![],
24864                })
24865            }
24866            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24867                Ok(())
24868            }
24869            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24870                Ok(false)
24871            }
24872            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24873                Ok(())
24874            }
24875            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24876                "TEXT"
24877            }
24878            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24879                "TEXT"
24880            }
24881        }
24882        assert!(
24883            list_sql_foreign_keys(&NoIntrospection, "anything", None)
24884                .unwrap()
24885                .is_empty()
24886        );
24887    }
24888
24889    #[test]
24890    fn list_sql_foreign_keys_routes_to_backend_override() {
24891        struct MultiSchemaFk;
24892        impl super::SqlConnection for MultiSchemaFk {
24893            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
24894                Ok(SqlQueryResult {
24895                    columns: vec![],
24896                    rows: vec![],
24897                })
24898            }
24899            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
24900                Ok(())
24901            }
24902            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
24903                Ok(false)
24904            }
24905            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
24906                Ok(())
24907            }
24908            fn dtype_sql(&self, _dtype: DType) -> &'static str {
24909                "TEXT"
24910            }
24911            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
24912                "TEXT"
24913            }
24914            fn supports_schemas(&self) -> bool {
24915                true
24916            }
24917            fn list_foreign_keys(
24918                &self,
24919                table: &str,
24920                schema: Option<&str>,
24921            ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
24922                if table == "orders" && schema == Some("sales") {
24923                    Ok(vec![SqlForeignKeySchema {
24924                        constraint_name: Some("orders_user_fk".to_owned()),
24925                        columns: vec!["user_id".to_owned()],
24926                        referenced_table: "users".to_owned(),
24927                        referenced_columns: vec!["id".to_owned()],
24928                    }])
24929                } else {
24930                    Ok(vec![])
24931                }
24932            }
24933        }
24934        let conn = MultiSchemaFk;
24935        let fks = list_sql_foreign_keys(&conn, "orders", Some("sales")).unwrap();
24936        assert_eq!(fks.len(), 1);
24937        assert_eq!(fks[0].constraint_name.as_deref(), Some("orders_user_fk"));
24938        assert_eq!(fks[0].referenced_table, "users");
24939        // Wrong schema → empty (override scopes by Some).
24940        assert!(
24941            list_sql_foreign_keys(&conn, "orders", Some("audit"))
24942                .unwrap()
24943                .is_empty()
24944        );
24945    }
24946
24947    // ── list_sql_views / SqlConnection::list_views (br-gm3r / fd90.30) ────
24948
24949    #[cfg(feature = "sql-sqlite")]
24950    #[test]
24951    fn list_sql_views_empty_db_returns_empty() {
24952        let conn = make_sql_test_conn();
24953        let views = list_sql_views(&conn, None).unwrap();
24954        assert!(views.is_empty());
24955    }
24956
24957    #[cfg(feature = "sql-sqlite")]
24958    #[test]
24959    fn list_sql_views_returns_user_views_sorted() {
24960        let conn = make_sql_test_conn();
24961        super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (id INTEGER, val TEXT);")
24962            .unwrap();
24963        super::SqlConnection::execute_batch(
24964            &conn,
24965            "CREATE VIEW zebra_view AS SELECT id FROM base;",
24966        )
24967        .unwrap();
24968        super::SqlConnection::execute_batch(
24969            &conn,
24970            "CREATE VIEW alpha_view AS SELECT val FROM base;",
24971        )
24972        .unwrap();
24973        let views = list_sql_views(&conn, None).unwrap();
24974        assert_eq!(views, vec!["alpha_view", "zebra_view"]);
24975    }
24976
24977    #[cfg(feature = "sql-sqlite")]
24978    #[test]
24979    fn list_sql_views_separated_from_list_tables() {
24980        // list_views must NOT surface tables; list_tables must NOT surface
24981        // views. The two buckets are disjoint per SQLAlchemy.Inspector.
24982        let conn = make_sql_test_conn();
24983        super::SqlConnection::execute_batch(&conn, "CREATE TABLE just_tbl (x INTEGER);").unwrap();
24984        super::SqlConnection::execute_batch(
24985            &conn,
24986            "CREATE VIEW just_view AS SELECT x FROM just_tbl;",
24987        )
24988        .unwrap();
24989
24990        let tables = list_sql_tables(&conn, None).unwrap();
24991        let views = list_sql_views(&conn, None).unwrap();
24992        assert_eq!(tables, vec!["just_tbl"]);
24993        assert_eq!(views, vec!["just_view"]);
24994        assert!(!tables.contains(&"just_view".to_owned()));
24995        assert!(!views.contains(&"just_tbl".to_owned()));
24996    }
24997
24998    #[cfg(feature = "sql-sqlite")]
24999    #[test]
25000    fn list_sql_views_schema_silently_ignored_on_sqlite() {
25001        let conn = make_sql_test_conn();
25002        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25003        super::SqlConnection::execute_batch(&conn, "CREATE VIEW v AS SELECT x FROM t;").unwrap();
25004        let with_schema =
25005            list_sql_views(&conn, Some("ignored_on_sqlite")).expect("schema arg must not error");
25006        let without_schema = list_sql_views(&conn, None).unwrap();
25007        assert_eq!(with_schema, without_schema);
25008    }
25009
25010    #[test]
25011    fn list_sql_views_default_impl_returns_empty() {
25012        struct NoIntrospection;
25013        impl super::SqlConnection for NoIntrospection {
25014            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25015                Ok(SqlQueryResult {
25016                    columns: vec![],
25017                    rows: vec![],
25018                })
25019            }
25020            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25021                Ok(())
25022            }
25023            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25024                Ok(false)
25025            }
25026            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25027                Ok(())
25028            }
25029            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25030                "TEXT"
25031            }
25032            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25033                "TEXT"
25034            }
25035        }
25036        let conn = NoIntrospection;
25037        assert!(list_sql_views(&conn, None).unwrap().is_empty());
25038        assert!(list_sql_views(&conn, Some("any")).unwrap().is_empty());
25039    }
25040
25041    #[test]
25042    fn list_sql_views_routes_schema_to_backend_override() {
25043        struct MultiSchemaViews;
25044        impl super::SqlConnection for MultiSchemaViews {
25045            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25046                Ok(SqlQueryResult {
25047                    columns: vec![],
25048                    rows: vec![],
25049                })
25050            }
25051            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25052                Ok(())
25053            }
25054            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25055                Ok(false)
25056            }
25057            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25058                Ok(())
25059            }
25060            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25061                "TEXT"
25062            }
25063            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25064                "TEXT"
25065            }
25066            fn supports_schemas(&self) -> bool {
25067                true
25068            }
25069            fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
25070                Ok(match schema {
25071                    Some("reporting") => vec!["daily".to_owned(), "weekly".to_owned()],
25072                    Some("audit") => vec!["log_view".to_owned()],
25073                    _ => vec![],
25074                })
25075            }
25076        }
25077        let conn = MultiSchemaViews;
25078        assert_eq!(
25079            list_sql_views(&conn, Some("reporting")).unwrap(),
25080            vec!["daily", "weekly"]
25081        );
25082        assert_eq!(
25083            list_sql_views(&conn, Some("audit")).unwrap(),
25084            vec!["log_view"]
25085        );
25086        assert!(list_sql_views(&conn, None).unwrap().is_empty());
25087    }
25088
25089    // ── list_sql_unique_constraints / SqlConnection::list_unique_constraints
25090    //    (br-sh4v / fd90.31) ────────────────────────────────────────────────
25091
25092    #[cfg(feature = "sql-sqlite")]
25093    #[test]
25094    fn list_sql_unique_constraints_unknown_table_returns_empty() {
25095        let conn = make_sql_test_conn();
25096        let uqs = list_sql_unique_constraints(&conn, "no_such", None).unwrap();
25097        assert!(uqs.is_empty());
25098    }
25099
25100    #[cfg(feature = "sql-sqlite")]
25101    #[test]
25102    fn list_sql_unique_constraints_table_without_uq() {
25103        let conn = make_sql_test_conn();
25104        super::SqlConnection::execute_batch(&conn, "CREATE TABLE plain (a INTEGER, b TEXT);")
25105            .unwrap();
25106        let uqs = list_sql_unique_constraints(&conn, "plain", None).unwrap();
25107        assert!(uqs.is_empty());
25108    }
25109
25110    #[cfg(feature = "sql-sqlite")]
25111    #[test]
25112    fn list_sql_unique_constraints_inline_unique() {
25113        let conn = make_sql_test_conn();
25114        super::SqlConnection::execute_batch(
25115            &conn,
25116            "CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT UNIQUE);",
25117        )
25118        .unwrap();
25119        let uqs = list_sql_unique_constraints(&conn, "users", None).unwrap();
25120        assert_eq!(uqs.len(), 1);
25121        assert_eq!(uqs[0].columns, vec!["email"]);
25122        // SQLite gives backend-generated names like sqlite_autoindex_users_1.
25123        assert!(
25124            uqs[0].name.starts_with("sqlite_autoindex_users_"),
25125            "expected sqlite_autoindex_ name; got {}",
25126            uqs[0].name
25127        );
25128    }
25129
25130    #[cfg(feature = "sql-sqlite")]
25131    #[test]
25132    fn list_sql_unique_constraints_composite_table_constraint() {
25133        let conn = make_sql_test_conn();
25134        super::SqlConnection::execute_batch(
25135            &conn,
25136            "CREATE TABLE rolling ( \
25137                year INTEGER, month INTEGER, code TEXT, val REAL, \
25138                UNIQUE (year, month, code) \
25139             );",
25140        )
25141        .unwrap();
25142        let uqs = list_sql_unique_constraints(&conn, "rolling", None).unwrap();
25143        assert_eq!(uqs.len(), 1);
25144        assert_eq!(uqs[0].columns, vec!["year", "month", "code"]);
25145    }
25146
25147    #[cfg(feature = "sql-sqlite")]
25148    #[test]
25149    fn list_sql_unique_constraints_disjoint_from_create_unique_index() {
25150        // Per SQLAlchemy: get_unique_constraints surfaces declared UNIQUE
25151        // constraints (origin='u'); get_indexes surfaces user-created
25152        // CREATE UNIQUE INDEX (origin='c'). The two must be disjoint.
25153        let conn = make_sql_test_conn();
25154        super::SqlConnection::execute_batch(
25155            &conn,
25156            "CREATE TABLE mixed ( \
25157                a INTEGER, \
25158                b TEXT, \
25159                c TEXT, \
25160                UNIQUE (a) \
25161             );",
25162        )
25163        .unwrap();
25164        super::SqlConnection::execute_batch(&conn, "CREATE UNIQUE INDEX idx_mixed_b ON mixed (b);")
25165            .unwrap();
25166
25167        let uqs = list_sql_unique_constraints(&conn, "mixed", None).unwrap();
25168        let idxs = list_sql_indexes(&conn, "mixed", None).unwrap();
25169
25170        // The UNIQUE constraint is in uqs only.
25171        assert_eq!(uqs.len(), 1);
25172        assert_eq!(uqs[0].columns, vec!["a"]);
25173        // The CREATE UNIQUE INDEX is in idxs only.
25174        assert_eq!(idxs.len(), 1);
25175        assert_eq!(idxs[0].name, "idx_mixed_b");
25176        assert!(idxs[0].unique);
25177        assert_eq!(idxs[0].columns, vec!["b"]);
25178
25179        // No overlap by name (uqs uses sqlite_autoindex_, idxs uses idx_).
25180        assert!(!uqs.iter().any(|u| u.name == "idx_mixed_b"));
25181        assert!(!idxs.iter().any(|i| i.name.starts_with("sqlite_autoindex_")));
25182    }
25183
25184    #[cfg(feature = "sql-sqlite")]
25185    #[test]
25186    fn list_sql_unique_constraints_rejects_invalid_table_name() {
25187        let conn = make_sql_test_conn();
25188        let err = list_sql_unique_constraints(&conn, "x; DROP TABLE users", None)
25189            .expect_err("must reject invalid identifier");
25190        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
25191    }
25192
25193    #[test]
25194    fn list_sql_unique_constraints_default_impl_returns_empty() {
25195        struct NoIntrospection;
25196        impl super::SqlConnection for NoIntrospection {
25197            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25198                Ok(SqlQueryResult {
25199                    columns: vec![],
25200                    rows: vec![],
25201                })
25202            }
25203            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25204                Ok(())
25205            }
25206            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25207                Ok(false)
25208            }
25209            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25210                Ok(())
25211            }
25212            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25213                "TEXT"
25214            }
25215            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25216                "TEXT"
25217            }
25218        }
25219        assert!(
25220            list_sql_unique_constraints(&NoIntrospection, "anything", None)
25221                .unwrap()
25222                .is_empty()
25223        );
25224    }
25225
25226    #[test]
25227    fn list_sql_unique_constraints_routes_to_backend_override() {
25228        struct MultiSchemaUq;
25229        impl super::SqlConnection for MultiSchemaUq {
25230            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25231                Ok(SqlQueryResult {
25232                    columns: vec![],
25233                    rows: vec![],
25234                })
25235            }
25236            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25237                Ok(())
25238            }
25239            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25240                Ok(false)
25241            }
25242            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25243                Ok(())
25244            }
25245            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25246                "TEXT"
25247            }
25248            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25249                "TEXT"
25250            }
25251            fn supports_schemas(&self) -> bool {
25252                true
25253            }
25254            fn list_unique_constraints(
25255                &self,
25256                table: &str,
25257                schema: Option<&str>,
25258            ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
25259                if table == "users" && schema == Some("public") {
25260                    Ok(vec![SqlUniqueConstraintSchema {
25261                        name: "users_email_key".to_owned(),
25262                        columns: vec!["email".to_owned()],
25263                    }])
25264                } else {
25265                    Ok(vec![])
25266                }
25267            }
25268        }
25269        let conn = MultiSchemaUq;
25270        let uqs = list_sql_unique_constraints(&conn, "users", Some("public")).unwrap();
25271        assert_eq!(uqs.len(), 1);
25272        assert_eq!(uqs[0].name, "users_email_key");
25273        assert!(
25274            list_sql_unique_constraints(&conn, "users", Some("audit"))
25275                .unwrap()
25276                .is_empty()
25277        );
25278    }
25279
25280    // ── sql_table_comment / SqlConnection::table_comment (br-yu3w / fd90.32) ─
25281
25282    #[cfg(feature = "sql-sqlite")]
25283    #[test]
25284    fn sql_table_comment_returns_none_on_sqlite() {
25285        // SQLite has no native table-comment storage; the trait default
25286        // returns None even for a real table.
25287        let conn = make_sql_test_conn();
25288        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25289        let comment = sql_table_comment(&conn, "t", None).unwrap();
25290        assert!(comment.is_none());
25291    }
25292
25293    #[cfg(feature = "sql-sqlite")]
25294    #[test]
25295    fn sql_table_comment_returns_none_on_sqlite_for_unknown_table() {
25296        let conn = make_sql_test_conn();
25297        let comment = sql_table_comment(&conn, "no_such", None).unwrap();
25298        assert!(comment.is_none());
25299    }
25300
25301    #[test]
25302    fn sql_table_comment_default_impl_returns_none() {
25303        struct NoIntrospection;
25304        impl super::SqlConnection for NoIntrospection {
25305            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25306                Ok(SqlQueryResult {
25307                    columns: vec![],
25308                    rows: vec![],
25309                })
25310            }
25311            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25312                Ok(())
25313            }
25314            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25315                Ok(false)
25316            }
25317            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25318                Ok(())
25319            }
25320            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25321                "TEXT"
25322            }
25323            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25324                "TEXT"
25325            }
25326        }
25327        let conn = NoIntrospection;
25328        assert!(
25329            sql_table_comment(&conn, "anything", None)
25330                .unwrap()
25331                .is_none()
25332        );
25333    }
25334
25335    #[test]
25336    fn sql_table_comment_routes_to_backend_override() {
25337        struct PgLikeStub;
25338        impl super::SqlConnection for PgLikeStub {
25339            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25340                Ok(SqlQueryResult {
25341                    columns: vec![],
25342                    rows: vec![],
25343                })
25344            }
25345            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25346                Ok(())
25347            }
25348            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25349                Ok(false)
25350            }
25351            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25352                Ok(())
25353            }
25354            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25355                "TEXT"
25356            }
25357            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25358                "TEXT"
25359            }
25360            fn supports_schemas(&self) -> bool {
25361                true
25362            }
25363            fn table_comment(
25364                &self,
25365                table: &str,
25366                schema: Option<&str>,
25367            ) -> Result<Option<String>, IoError> {
25368                if table == "users" && schema == Some("public") {
25369                    Ok(Some("Customer accounts table".to_owned()))
25370                } else {
25371                    Ok(None)
25372                }
25373            }
25374        }
25375        let conn = PgLikeStub;
25376        assert_eq!(
25377            sql_table_comment(&conn, "users", Some("public"))
25378                .unwrap()
25379                .as_deref(),
25380            Some("Customer accounts table")
25381        );
25382        assert!(
25383            sql_table_comment(&conn, "users", Some("audit"))
25384                .unwrap()
25385                .is_none()
25386        );
25387        assert!(
25388            sql_table_comment(&conn, "missing", Some("public"))
25389                .unwrap()
25390                .is_none()
25391        );
25392    }
25393
25394    #[test]
25395    fn sql_table_comment_propagates_backend_error() {
25396        struct BrokenIntrospection;
25397        impl super::SqlConnection for BrokenIntrospection {
25398            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25399                Ok(SqlQueryResult {
25400                    columns: vec![],
25401                    rows: vec![],
25402                })
25403            }
25404            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25405                Ok(())
25406            }
25407            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25408                Ok(false)
25409            }
25410            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25411                Ok(())
25412            }
25413            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25414                "TEXT"
25415            }
25416            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25417                "TEXT"
25418            }
25419            fn table_comment(
25420                &self,
25421                _table: &str,
25422                _schema: Option<&str>,
25423            ) -> Result<Option<String>, IoError> {
25424                Err(IoError::Sql(
25425                    "permission denied for pg_description".to_owned(),
25426                ))
25427            }
25428        }
25429        let conn = BrokenIntrospection;
25430        let err =
25431            sql_table_comment(&conn, "anything", None).expect_err("backend error must surface");
25432        assert!(matches!(err, IoError::Sql(msg) if msg.contains("permission denied")));
25433    }
25434
25435    // ── SqlWriteOptions::chunksize (br-ls9z / fd90.33) ────────────────────
25436
25437    #[cfg(feature = "sql-sqlite")]
25438    #[test]
25439    fn write_sql_chunksize_zero_rejected() {
25440        let conn = make_sql_test_conn();
25441        let frame =
25442            fp_frame::DataFrame::from_dict(&["x"], vec![("x", vec![Scalar::Int64(1)])]).unwrap();
25443        let err = write_sql_with_options(
25444            &frame,
25445            &conn,
25446            "t",
25447            &SqlWriteOptions {
25448                if_exists: SqlIfExists::Fail,
25449                index: false,
25450                index_label: None,
25451                schema: None,
25452                dtype: None,
25453                method: SqlInsertMethod::Single,
25454                chunksize: Some(0),
25455            },
25456        )
25457        .expect_err("chunksize=0 must be rejected");
25458        assert!(matches!(err, IoError::Sql(msg) if msg.contains("chunksize")));
25459    }
25460
25461    #[cfg(feature = "sql-sqlite")]
25462    #[test]
25463    fn write_sql_chunksize_none_preserves_single_transaction_semantics() {
25464        // 5 rows, chunksize=None — should round-trip cleanly into one
25465        // transaction (same as before fd90.33 landed).
25466        let conn = make_sql_test_conn();
25467        let frame = fp_frame::DataFrame::from_dict(
25468            &["id"],
25469            vec![(
25470                "id",
25471                vec![
25472                    Scalar::Int64(1),
25473                    Scalar::Int64(2),
25474                    Scalar::Int64(3),
25475                    Scalar::Int64(4),
25476                    Scalar::Int64(5),
25477                ],
25478            )],
25479        )
25480        .unwrap();
25481        write_sql_with_options(
25482            &frame,
25483            &conn,
25484            "no_chunk",
25485            &SqlWriteOptions {
25486                if_exists: SqlIfExists::Fail,
25487                index: false,
25488                index_label: None,
25489                schema: None,
25490                dtype: None,
25491                method: SqlInsertMethod::Single,
25492                chunksize: None,
25493            },
25494        )
25495        .unwrap();
25496        let count =
25497            super::SqlConnection::query(&conn, "SELECT COUNT(*) FROM no_chunk", &[]).unwrap();
25498        assert_eq!(count.rows[0][0], Scalar::Int64(5));
25499    }
25500
25501    #[cfg(feature = "sql-sqlite")]
25502    #[test]
25503    fn write_sql_single_chunksize_round_trips_all_rows() {
25504        // 5 rows with chunksize=2: chunks of (2, 2, 1). All rows must
25505        // round-trip and the table must contain every row regardless
25506        // of how the chunks committed.
25507        let conn = make_sql_test_conn();
25508        let frame = fp_frame::DataFrame::from_dict(
25509            &["id"],
25510            vec![(
25511                "id",
25512                vec![
25513                    Scalar::Int64(1),
25514                    Scalar::Int64(2),
25515                    Scalar::Int64(3),
25516                    Scalar::Int64(4),
25517                    Scalar::Int64(5),
25518                ],
25519            )],
25520        )
25521        .unwrap();
25522        write_sql_with_options(
25523            &frame,
25524            &conn,
25525            "chunked",
25526            &SqlWriteOptions {
25527                if_exists: SqlIfExists::Fail,
25528                index: false,
25529                index_label: None,
25530                schema: None,
25531                dtype: None,
25532                method: SqlInsertMethod::Single,
25533                chunksize: Some(2),
25534            },
25535        )
25536        .unwrap();
25537        let result =
25538            super::SqlConnection::query(&conn, "SELECT id FROM chunked ORDER BY id", &[]).unwrap();
25539        let ids: Vec<i64> = result
25540            .rows
25541            .iter()
25542            .map(|r| match &r[0] {
25543                Scalar::Int64(v) => *v,
25544                other => unreachable!("unexpected scalar: {other:?}"),
25545            })
25546            .collect();
25547        assert_eq!(ids, vec![1, 2, 3, 4, 5]);
25548    }
25549
25550    #[test]
25551    fn write_sql_single_chunksize_dispatches_correct_chunk_counts() {
25552        // Recording stub verifies the chunk boundaries.
25553        use std::cell::RefCell;
25554        struct Recorder {
25555            row_counts: RefCell<Vec<usize>>,
25556        }
25557        impl super::SqlConnection for Recorder {
25558            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25559                Ok(SqlQueryResult {
25560                    columns: vec![],
25561                    rows: vec![],
25562                })
25563            }
25564            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25565                Ok(())
25566            }
25567            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25568                Ok(false)
25569            }
25570            fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25571                self.row_counts.borrow_mut().push(rows.len());
25572                Ok(())
25573            }
25574            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25575                "TEXT"
25576            }
25577            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25578                "TEXT"
25579            }
25580        }
25581        let conn = Recorder {
25582            row_counts: RefCell::new(vec![]),
25583        };
25584        let frame = fp_frame::DataFrame::from_dict(
25585            &["x"],
25586            vec![(
25587                "x",
25588                vec![
25589                    Scalar::Int64(1),
25590                    Scalar::Int64(2),
25591                    Scalar::Int64(3),
25592                    Scalar::Int64(4),
25593                    Scalar::Int64(5),
25594                ],
25595            )],
25596        )
25597        .unwrap();
25598        write_sql_with_options(
25599            &frame,
25600            &conn,
25601            "chunked",
25602            &SqlWriteOptions {
25603                if_exists: SqlIfExists::Fail,
25604                index: false,
25605                index_label: None,
25606                schema: None,
25607                dtype: None,
25608                method: SqlInsertMethod::Single,
25609                chunksize: Some(2),
25610            },
25611        )
25612        .unwrap();
25613        // Single mode: each chunk submits a slice of rows to insert_rows.
25614        // chunks of size 2, 2, 1 → 3 calls with row counts [2, 2, 1].
25615        assert_eq!(*conn.row_counts.borrow(), vec![2usize, 2, 1]);
25616    }
25617
25618    #[test]
25619    fn write_sql_multi_chunksize_takes_min_with_param_cap() {
25620        // Multi mode with max_param_count=10, ncols=2 → param chunk = 5
25621        // rows. chunksize=3 should win (min(3, 5) = 3).
25622        // Multi mode flattens each chunk to a single insert_rows call
25623        // where rows[0].len() = chunk_size * ncols.
25624        use std::cell::RefCell;
25625        struct ParamCapRecorder {
25626            row_counts: RefCell<Vec<usize>>,
25627        }
25628        impl super::SqlConnection for ParamCapRecorder {
25629            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25630                Ok(SqlQueryResult {
25631                    columns: vec![],
25632                    rows: vec![],
25633                })
25634            }
25635            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25636                Ok(())
25637            }
25638            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25639                Ok(false)
25640            }
25641            fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25642                // Multi mode passes a single flattened "row" per chunk.
25643                self.row_counts
25644                    .borrow_mut()
25645                    .push(rows.first().map_or(0, std::vec::Vec::len));
25646                Ok(())
25647            }
25648            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25649                "TEXT"
25650            }
25651            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25652                "TEXT"
25653            }
25654            fn max_param_count(&self) -> Option<usize> {
25655                Some(10)
25656            }
25657        }
25658        let conn = ParamCapRecorder {
25659            row_counts: RefCell::new(vec![]),
25660        };
25661        let frame = fp_frame::DataFrame::from_dict(
25662            &["a", "b"],
25663            vec![
25664                (
25665                    "a",
25666                    vec![
25667                        Scalar::Int64(1),
25668                        Scalar::Int64(2),
25669                        Scalar::Int64(3),
25670                        Scalar::Int64(4),
25671                        Scalar::Int64(5),
25672                    ],
25673                ),
25674                (
25675                    "b",
25676                    vec![
25677                        Scalar::Int64(10),
25678                        Scalar::Int64(20),
25679                        Scalar::Int64(30),
25680                        Scalar::Int64(40),
25681                        Scalar::Int64(50),
25682                    ],
25683                ),
25684            ],
25685        )
25686        .unwrap();
25687        write_sql_with_options(
25688            &frame,
25689            &conn,
25690            "chunked",
25691            &SqlWriteOptions {
25692                if_exists: SqlIfExists::Fail,
25693                index: false,
25694                index_label: None,
25695                schema: None,
25696                dtype: None,
25697                method: SqlInsertMethod::Multi,
25698                chunksize: Some(3),
25699            },
25700        )
25701        .unwrap();
25702        // chunksize=3 wins over param cap (5). 5 rows / 3 per chunk = 2
25703        // chunks (3, 2). Flat scalars per chunk: 3*2=6 then 2*2=4.
25704        assert_eq!(*conn.row_counts.borrow(), vec![6usize, 4]);
25705    }
25706
25707    #[test]
25708    fn write_sql_multi_chunksize_param_cap_wins_when_smaller() {
25709        // Param cap = 4 (ncols=2 → 2 rows/chunk). chunksize=10 (loose).
25710        // Effective chunk = min(10, 2) = 2.
25711        use std::cell::RefCell;
25712        struct TightCap {
25713            row_counts: RefCell<Vec<usize>>,
25714        }
25715        impl super::SqlConnection for TightCap {
25716            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
25717                Ok(SqlQueryResult {
25718                    columns: vec![],
25719                    rows: vec![],
25720                })
25721            }
25722            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
25723                Ok(())
25724            }
25725            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
25726                Ok(false)
25727            }
25728            fn insert_rows(&self, _sql: &str, rows: &[Vec<Scalar>]) -> Result<(), IoError> {
25729                self.row_counts
25730                    .borrow_mut()
25731                    .push(rows.first().map_or(0, std::vec::Vec::len));
25732                Ok(())
25733            }
25734            fn dtype_sql(&self, _dtype: DType) -> &'static str {
25735                "TEXT"
25736            }
25737            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
25738                "TEXT"
25739            }
25740            fn max_param_count(&self) -> Option<usize> {
25741                Some(4)
25742            }
25743        }
25744        let conn = TightCap {
25745            row_counts: RefCell::new(vec![]),
25746        };
25747        let frame = fp_frame::DataFrame::from_dict(
25748            &["a", "b"],
25749            vec![
25750                (
25751                    "a",
25752                    vec![
25753                        Scalar::Int64(1),
25754                        Scalar::Int64(2),
25755                        Scalar::Int64(3),
25756                        Scalar::Int64(4),
25757                        Scalar::Int64(5),
25758                    ],
25759                ),
25760                (
25761                    "b",
25762                    vec![
25763                        Scalar::Int64(10),
25764                        Scalar::Int64(20),
25765                        Scalar::Int64(30),
25766                        Scalar::Int64(40),
25767                        Scalar::Int64(50),
25768                    ],
25769                ),
25770            ],
25771        )
25772        .unwrap();
25773        write_sql_with_options(
25774            &frame,
25775            &conn,
25776            "chunked",
25777            &SqlWriteOptions {
25778                if_exists: SqlIfExists::Fail,
25779                index: false,
25780                index_label: None,
25781                schema: None,
25782                dtype: None,
25783                method: SqlInsertMethod::Multi,
25784                chunksize: Some(10),
25785            },
25786        )
25787        .unwrap();
25788        // 5 rows / 2 per chunk = 3 chunks (2, 2, 1). Flat scalars: 4, 4, 2.
25789        assert_eq!(*conn.row_counts.borrow(), vec![4usize, 4, 2]);
25790    }
25791
25792    // ── SqlReadOptions::columns (br-d3e9 / fd90.34) ──────────────────────
25793
25794    #[cfg(feature = "sql-sqlite")]
25795    #[test]
25796    fn read_sql_table_with_options_columns_none_selects_all() {
25797        let conn = make_sql_test_conn();
25798        super::SqlConnection::execute_batch(
25799            &conn,
25800            "CREATE TABLE projection_default (a INTEGER, b TEXT, c REAL);",
25801        )
25802        .unwrap();
25803        super::SqlConnection::execute_batch(
25804            &conn,
25805            "INSERT INTO projection_default VALUES (1, 'x', 1.5);",
25806        )
25807        .unwrap();
25808        let frame = read_sql_table_with_options(
25809            &conn,
25810            "projection_default",
25811            &SqlReadOptions {
25812                columns: None,
25813                ..Default::default()
25814            },
25815        )
25816        .unwrap();
25817        assert_eq!(frame.column_names(), vec!["a", "b", "c"]);
25818    }
25819
25820    #[cfg(feature = "sql-sqlite")]
25821    #[test]
25822    fn read_sql_table_with_options_columns_projects_subset() {
25823        let conn = make_sql_test_conn();
25824        super::SqlConnection::execute_batch(
25825            &conn,
25826            "CREATE TABLE projection (id INTEGER, name TEXT, ts TEXT, value REAL);",
25827        )
25828        .unwrap();
25829        super::SqlConnection::execute_batch(
25830            &conn,
25831            "INSERT INTO projection VALUES (1, 'a', '2024-01-01', 1.5), \
25832                                            (2, 'b', '2024-01-02', 2.5);",
25833        )
25834        .unwrap();
25835        let frame = read_sql_table_with_options(
25836            &conn,
25837            "projection",
25838            &SqlReadOptions {
25839                columns: Some(vec!["id".to_owned(), "name".to_owned()]),
25840                ..Default::default()
25841            },
25842        )
25843        .unwrap();
25844        // Only id + name, in that order.
25845        assert_eq!(frame.column_names(), vec!["id", "name"]);
25846        assert_eq!(frame.column("id").unwrap().values()[0], Scalar::Int64(1));
25847        assert_eq!(
25848            frame.column("name").unwrap().values()[0],
25849            Scalar::Utf8("a".into())
25850        );
25851    }
25852
25853    #[cfg(feature = "sql-sqlite")]
25854    #[test]
25855    fn read_sql_table_with_options_columns_preserves_specified_order() {
25856        // pandas: pd.read_sql_table(t, con, columns=['c', 'a']) →
25857        // returns ['c', 'a'] in that exact order, NOT alphabetical.
25858        let conn = make_sql_test_conn();
25859        super::SqlConnection::execute_batch(
25860            &conn,
25861            "CREATE TABLE ordered_proj (a INT, b INT, c INT);",
25862        )
25863        .unwrap();
25864        super::SqlConnection::execute_batch(&conn, "INSERT INTO ordered_proj VALUES (1, 2, 3);")
25865            .unwrap();
25866        let frame = read_sql_table_with_options(
25867            &conn,
25868            "ordered_proj",
25869            &SqlReadOptions {
25870                columns: Some(vec!["c".to_owned(), "a".to_owned()]),
25871                ..Default::default()
25872            },
25873        )
25874        .unwrap();
25875        assert_eq!(frame.column_names(), vec!["c", "a"]);
25876    }
25877
25878    #[cfg(feature = "sql-sqlite")]
25879    #[test]
25880    fn read_sql_table_with_options_columns_empty_vec_rejected() {
25881        let conn = make_sql_test_conn();
25882        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25883        let err = read_sql_table_with_options(
25884            &conn,
25885            "t",
25886            &SqlReadOptions {
25887                columns: Some(vec![]),
25888                ..Default::default()
25889            },
25890        )
25891        .expect_err("empty columns must be rejected");
25892        assert!(matches!(err, IoError::Sql(msg) if msg.contains("columns must be non-empty")));
25893    }
25894
25895    #[cfg(feature = "sql-sqlite")]
25896    #[test]
25897    fn read_sql_table_with_options_columns_invalid_name_rejected() {
25898        let conn = make_sql_test_conn();
25899        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
25900        let err = read_sql_table_with_options(
25901            &conn,
25902            "t",
25903            &SqlReadOptions {
25904                columns: Some(vec!["x; DROP TABLE t".to_owned()]),
25905                ..Default::default()
25906            },
25907        )
25908        .expect_err("invalid column name must be rejected");
25909        assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid")));
25910    }
25911
25912    #[cfg(feature = "sql-sqlite")]
25913    #[test]
25914    fn read_sql_table_with_options_columns_combines_with_parse_dates() {
25915        // columns + parse_dates: project a subset, then date-coerce.
25916        let conn = make_sql_test_conn();
25917        super::SqlConnection::execute_batch(
25918            &conn,
25919            "CREATE TABLE events (id INT, ts TEXT, note TEXT);",
25920        )
25921        .unwrap();
25922        super::SqlConnection::execute_batch(
25923            &conn,
25924            "INSERT INTO events VALUES (1, '2024-01-15', 'launched');",
25925        )
25926        .unwrap();
25927        let frame = read_sql_table_with_options(
25928            &conn,
25929            "events",
25930            &SqlReadOptions {
25931                columns: Some(vec!["id".to_owned(), "ts".to_owned()]),
25932                index_col: None,
25933                parse_dates: Some(vec!["ts".to_owned()]),
25934                ..Default::default()
25935            },
25936        )
25937        .unwrap();
25938        // Only id + ts surfaced; ts was reformatted by parse_dates
25939        // (the project-then-coerce path emits the canonical
25940        // 'YYYY-MM-DD HH:MM:SS' shape via Scalar::Utf8).
25941        assert_eq!(frame.column_names(), vec!["id", "ts"]);
25942        assert_eq!(
25943            frame.column("ts").unwrap().values()[0],
25944            Scalar::Utf8("2024-01-15 00:00:00".to_owned())
25945        );
25946    }
25947
25948    #[cfg(feature = "sql-sqlite")]
25949    #[test]
25950    fn read_sql_table_chunks_with_options_columns_projects_before_chunking() {
25951        let conn = make_sql_test_conn();
25952        super::SqlConnection::execute_batch(
25953            &conn,
25954            "CREATE TABLE chunk_projection (id INTEGER, name TEXT, hidden REAL);",
25955        )
25956        .unwrap();
25957        super::SqlConnection::execute_batch(
25958            &conn,
25959            "INSERT INTO chunk_projection VALUES \
25960                (1, 'a', 10.0), \
25961                (2, 'b', 20.0), \
25962                (3, 'c', 30.0);",
25963        )
25964        .unwrap();
25965
25966        let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options(
25967            &conn,
25968            "chunk_projection",
25969            &SqlReadOptions {
25970                columns: Some(vec!["name".to_owned(), "id".to_owned()]),
25971                ..Default::default()
25972            },
25973            2,
25974        )
25975        .unwrap()
25976        .collect::<Result<Vec<_>, _>>()
25977        .unwrap();
25978
25979        assert_eq!(chunks.len(), 2);
25980        assert_eq!(chunks[0].column_names(), vec!["name", "id"]);
25981        assert_eq!(chunks[1].column_names(), vec!["name", "id"]);
25982        assert_eq!(
25983            chunks[0].column("name").unwrap().values(),
25984            &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
25985        );
25986        assert_eq!(
25987            chunks[1].column("id").unwrap().values(),
25988            &[Scalar::Int64(3)]
25989        );
25990        assert!(chunks[0].column("hidden").is_none());
25991    }
25992
25993    #[test]
25994    fn read_sql_table_chunks_with_options_schema_projects_before_chunking() {
25995        use std::cell::RefCell;
25996
25997        struct MultiSchemaProjectedChunks {
25998            queries: RefCell<Vec<String>>,
25999        }
26000
26001        impl super::SqlConnection for MultiSchemaProjectedChunks {
26002            fn query(&self, query: &str, _params: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26003                self.queries.borrow_mut().push(query.to_owned());
26004                Ok(SqlQueryResult {
26005                    columns: vec!["name".to_owned(), "id".to_owned()],
26006                    rows: vec![
26007                        vec![Scalar::Utf8("a".to_owned()), Scalar::Int64(1)],
26008                        vec![Scalar::Utf8("b".to_owned()), Scalar::Int64(2)],
26009                        vec![Scalar::Utf8("c".to_owned()), Scalar::Int64(3)],
26010                    ],
26011                })
26012            }
26013
26014            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26015                Ok(())
26016            }
26017
26018            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26019                Ok(false)
26020            }
26021
26022            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26023                Ok(())
26024            }
26025
26026            fn dtype_sql(&self, _dtype: DType) -> &'static str {
26027                "TEXT"
26028            }
26029
26030            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26031                "TEXT"
26032            }
26033
26034            fn supports_schemas(&self) -> bool {
26035                true
26036            }
26037        }
26038
26039        let conn = MultiSchemaProjectedChunks {
26040            queries: RefCell::new(Vec::new()),
26041        };
26042
26043        let chunks: Vec<DataFrame> = super::read_sql_table_chunks_with_options(
26044            &conn,
26045            "events",
26046            &SqlReadOptions {
26047                schema: Some("analytics".to_owned()),
26048                columns: Some(vec!["name".to_owned(), "id".to_owned()]),
26049                ..Default::default()
26050            },
26051            2,
26052        )
26053        .unwrap()
26054        .collect::<Result<Vec<_>, _>>()
26055        .unwrap();
26056
26057        assert_eq!(
26058            conn.queries.borrow().as_slice(),
26059            &["SELECT \"name\", \"id\" FROM \"analytics\".\"events\"".to_owned()]
26060        );
26061        assert_eq!(chunks.len(), 2);
26062        assert_eq!(chunks[0].column_names(), vec!["name", "id"]);
26063        assert_eq!(chunks[1].column_names(), vec!["name", "id"]);
26064        assert_eq!(
26065            chunks[0].index().labels(),
26066            &[IndexLabel::Int64(0), IndexLabel::Int64(1)]
26067        );
26068        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(0)]);
26069        assert_eq!(
26070            chunks[0].column("name").unwrap().values(),
26071            &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
26072        );
26073        assert_eq!(
26074            chunks[1].column("id").unwrap().values(),
26075            &[Scalar::Int64(3)]
26076        );
26077    }
26078
26079    // ── SqlColumnSchema::comment (br-cfld / fd90.35) ─────────────────────
26080
26081    #[cfg(feature = "sql-sqlite")]
26082    #[test]
26083    fn sql_table_schema_comment_is_none_on_sqlite() {
26084        // SQLite has no column-comment storage; the rusqlite override
26085        // must always emit comment=None even when the table is real.
26086        let conn = make_sql_test_conn();
26087        super::SqlConnection::execute_batch(&conn, "CREATE TABLE labeled (id INTEGER, name TEXT);")
26088            .unwrap();
26089        let schema = sql_table_schema(&conn, "labeled", None).unwrap().unwrap();
26090        for col in &schema.columns {
26091            assert!(
26092                col.comment.is_none(),
26093                "SQLite should report no column comment; got {:?} on {}",
26094                col.comment,
26095                col.name
26096            );
26097        }
26098    }
26099
26100    #[test]
26101    fn sql_table_schema_comment_routes_to_backend_override() {
26102        // PG-like backend stub returns explicit comment text per column.
26103        struct PgLikeWithComments;
26104        impl super::SqlConnection for PgLikeWithComments {
26105            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26106                Ok(SqlQueryResult {
26107                    columns: vec![],
26108                    rows: vec![],
26109                })
26110            }
26111            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26112                Ok(())
26113            }
26114            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26115                Ok(false)
26116            }
26117            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26118                Ok(())
26119            }
26120            fn dtype_sql(&self, _dtype: DType) -> &'static str {
26121                "TEXT"
26122            }
26123            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26124                "TEXT"
26125            }
26126            fn supports_schemas(&self) -> bool {
26127                true
26128            }
26129            fn table_schema(
26130                &self,
26131                table: &str,
26132                _schema: Option<&str>,
26133            ) -> Result<Option<SqlTableSchema>, IoError> {
26134                if table == "users" {
26135                    Ok(Some(SqlTableSchema {
26136                        table_name: "users".to_owned(),
26137                        columns: vec![
26138                            SqlColumnSchema {
26139                                name: "id".to_owned(),
26140                                declared_type: Some("BIGINT".to_owned()),
26141                                nullable: false,
26142                                default_value: None,
26143                                primary_key_ordinal: Some(0),
26144                                comment: Some("Surrogate primary key".to_owned()),
26145                                autoincrement: false,
26146                            },
26147                            SqlColumnSchema {
26148                                name: "email".to_owned(),
26149                                declared_type: Some("TEXT".to_owned()),
26150                                nullable: false,
26151                                default_value: None,
26152                                primary_key_ordinal: None,
26153                                comment: Some("Login identifier".to_owned()),
26154                                autoincrement: false,
26155                            },
26156                            SqlColumnSchema {
26157                                name: "name".to_owned(),
26158                                declared_type: Some("TEXT".to_owned()),
26159                                nullable: true,
26160                                default_value: None,
26161                                primary_key_ordinal: None,
26162                                comment: None,
26163                                autoincrement: false,
26164                            },
26165                        ],
26166                    }))
26167                } else {
26168                    Ok(None)
26169                }
26170            }
26171        }
26172        let conn = PgLikeWithComments;
26173        let schema = sql_table_schema(&conn, "users", None).unwrap().unwrap();
26174        let id = schema.column("id").unwrap();
26175        assert_eq!(id.comment.as_deref(), Some("Surrogate primary key"));
26176        let email = schema.column("email").unwrap();
26177        assert_eq!(email.comment.as_deref(), Some("Login identifier"));
26178        // Mixed: some columns may have no comment even on PG.
26179        let name = schema.column("name").unwrap();
26180        assert!(name.comment.is_none());
26181    }
26182
26183    // ── SqlReadOptions::index_col (br-c1h9 / fd90.36) ─────────────────────
26184
26185    #[cfg(feature = "sql-sqlite")]
26186    #[test]
26187    fn read_sql_with_options_index_col_none_keeps_range_index() {
26188        let conn = make_sql_test_conn();
26189        super::SqlConnection::execute_batch(&conn, "CREATE TABLE keyed (id INTEGER, val INTEGER);")
26190            .unwrap();
26191        super::SqlConnection::execute_batch(&conn, "INSERT INTO keyed VALUES (1, 10), (2, 20);")
26192            .unwrap();
26193        let frame = read_sql_with_options(
26194            &conn,
26195            "SELECT id, val FROM keyed ORDER BY id",
26196            &SqlReadOptions {
26197                index_col: None,
26198                ..Default::default()
26199            },
26200        )
26201        .unwrap();
26202        // Default RangeIndex: labels 0, 1.
26203        assert_eq!(frame.index().len(), 2);
26204        assert_eq!(frame.column_names(), vec!["id", "val"]);
26205    }
26206
26207    #[cfg(feature = "sql-sqlite")]
26208    #[test]
26209    fn read_sql_with_options_index_col_promotes_named_column() {
26210        let conn = make_sql_test_conn();
26211        super::SqlConnection::execute_batch(&conn, "CREATE TABLE keyed (id INTEGER, val INTEGER);")
26212            .unwrap();
26213        super::SqlConnection::execute_batch(&conn, "INSERT INTO keyed VALUES (10, 1), (20, 2);")
26214            .unwrap();
26215        let frame = read_sql_with_options(
26216            &conn,
26217            "SELECT id, val FROM keyed ORDER BY id",
26218            &SqlReadOptions {
26219                index_col: Some("id".to_owned()),
26220                ..Default::default()
26221            },
26222        )
26223        .unwrap();
26224        // 'id' removed from columns, used as index labels.
26225        assert_eq!(frame.column_names(), vec!["val"]);
26226        assert_eq!(frame.index().len(), 2);
26227        // Index labels should be the id values (10, 20).
26228        let labels: Vec<i64> = frame
26229            .index()
26230            .labels()
26231            .iter()
26232            .filter_map(|l| match l {
26233                IndexLabel::Int64(v) => Some(*v),
26234                _ => None,
26235            })
26236            .collect();
26237        assert_eq!(labels, vec![10, 20]);
26238    }
26239
26240    #[cfg(feature = "sql-sqlite")]
26241    #[test]
26242    fn read_sql_with_options_index_col_missing_column_errors() {
26243        let conn = make_sql_test_conn();
26244        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (a INTEGER);").unwrap();
26245        super::SqlConnection::execute_batch(&conn, "INSERT INTO t VALUES (1);").unwrap();
26246        let err = read_sql_with_options(
26247            &conn,
26248            "SELECT a FROM t",
26249            &SqlReadOptions {
26250                index_col: Some("nonexistent".to_owned()),
26251                ..Default::default()
26252            },
26253        )
26254        .expect_err("missing index_col must error");
26255        assert!(matches!(err, IoError::Sql(msg) if msg.contains("not present")));
26256    }
26257
26258    #[cfg(feature = "sql-sqlite")]
26259    #[test]
26260    fn read_sql_with_options_index_col_empty_string_rejected() {
26261        let conn = make_sql_test_conn();
26262        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (a INTEGER);").unwrap();
26263        let err = read_sql_with_options(
26264            &conn,
26265            "SELECT a FROM t",
26266            &SqlReadOptions {
26267                index_col: Some(String::new()),
26268                ..Default::default()
26269            },
26270        )
26271        .expect_err("empty index_col must be rejected");
26272        assert!(matches!(err, IoError::Sql(msg) if msg.contains("empty string")));
26273    }
26274
26275    #[cfg(feature = "sql-sqlite")]
26276    #[test]
26277    fn read_sql_explicit_index_col_empty_string_rejected_across_entrypoints() {
26278        fn assert_empty_index_col(err: IoError) {
26279            assert!(
26280                matches!(err, IoError::Sql(ref msg) if msg.contains("empty string")),
26281                "expected empty index_col error, got {err:?}"
26282            );
26283        }
26284
26285        let conn = make_sql_test_conn();
26286        super::SqlConnection::execute_batch(
26287            &conn,
26288            "CREATE TABLE explicit_idx (a INTEGER, b TEXT);
26289             INSERT INTO explicit_idx VALUES (1, 'x'), (2, 'y');",
26290        )
26291        .unwrap();
26292
26293        assert_empty_index_col(
26294            read_sql_with_index_col(&conn, "SELECT a, b FROM explicit_idx", Some(""))
26295                .expect_err("empty explicit read_sql index_col must be rejected"),
26296        );
26297        assert_empty_index_col(
26298            read_sql_query_with_options_and_index_col(
26299                &conn,
26300                "SELECT a, b FROM explicit_idx",
26301                &SqlReadOptions::default(),
26302                Some(""),
26303            )
26304            .expect_err("empty explicit read_sql_query index_col must be rejected"),
26305        );
26306        assert_empty_index_col(
26307            read_sql_query_chunks_with_options_and_index_col(
26308                &conn,
26309                "SELECT a, b FROM explicit_idx",
26310                &SqlReadOptions::default(),
26311                Some(""),
26312                1,
26313            )
26314            .expect_err("empty explicit query chunk index_col must be rejected"),
26315        );
26316        assert_empty_index_col(
26317            read_sql_table_with_index_col(&conn, "explicit_idx", Some(""))
26318                .expect_err("empty explicit table index_col must be rejected"),
26319        );
26320        assert_empty_index_col(
26321            read_sql_table_with_options_and_index_col(
26322                &conn,
26323                "explicit_idx",
26324                &SqlReadOptions::default(),
26325                Some(""),
26326            )
26327            .expect_err("empty explicit table options index_col must be rejected"),
26328        );
26329        assert_empty_index_col(
26330            read_sql_table_columns_with_index_col(&conn, "explicit_idx", &["a"], Some(""))
26331                .expect_err("empty explicit table-columns index_col must be rejected"),
26332        );
26333        assert_empty_index_col(
26334            read_sql_table_columns_chunks_with_index_col(
26335                &conn,
26336                "explicit_idx",
26337                &["a"],
26338                Some(""),
26339                1,
26340            )
26341            .expect_err("empty explicit table-columns chunk index_col must be rejected"),
26342        );
26343    }
26344
26345    #[cfg(feature = "sql-sqlite")]
26346    #[test]
26347    fn read_sql_table_with_options_index_col_combines_with_columns_projection() {
26348        // columns + index_col: project ['id', 'val'], promote 'id' to
26349        // index, leaving only 'val' as a data column.
26350        let conn = make_sql_test_conn();
26351        super::SqlConnection::execute_batch(
26352            &conn,
26353            "CREATE TABLE wide (id INTEGER, val INTEGER, ts TEXT, note TEXT);",
26354        )
26355        .unwrap();
26356        super::SqlConnection::execute_batch(
26357            &conn,
26358            "INSERT INTO wide VALUES (5, 100, 't1', 'first');",
26359        )
26360        .unwrap();
26361        let frame = read_sql_table_with_options(
26362            &conn,
26363            "wide",
26364            &SqlReadOptions {
26365                columns: Some(vec!["id".to_owned(), "val".to_owned()]),
26366                index_col: Some("id".to_owned()),
26367                ..Default::default()
26368            },
26369        )
26370        .unwrap();
26371        assert_eq!(frame.column_names(), vec!["val"]);
26372        let labels: Vec<i64> = frame
26373            .index()
26374            .labels()
26375            .iter()
26376            .filter_map(|l| match l {
26377                IndexLabel::Int64(v) => Some(*v),
26378                _ => None,
26379            })
26380            .collect();
26381        assert_eq!(labels, vec![5]);
26382    }
26383
26384    #[cfg(feature = "sql-sqlite")]
26385    #[test]
26386    fn read_sql_table_with_options_columns_auto_project_index_col() {
26387        let conn = make_sql_test_conn();
26388        super::SqlConnection::execute_batch(
26389            &conn,
26390            "CREATE TABLE projected_index (id INTEGER, val TEXT, hidden TEXT);",
26391        )
26392        .unwrap();
26393        super::SqlConnection::execute_batch(
26394            &conn,
26395            "INSERT INTO projected_index VALUES (10, 'a', 'x'), (20, 'b', 'y');",
26396        )
26397        .unwrap();
26398
26399        let frame = read_sql_table_with_options(
26400            &conn,
26401            "projected_index",
26402            &SqlReadOptions {
26403                columns: Some(vec!["val".to_owned()]),
26404                index_col: Some("id".to_owned()),
26405                ..Default::default()
26406            },
26407        )
26408        .unwrap();
26409
26410        assert_eq!(frame.column_names(), vec!["val"]);
26411        assert!(frame.column("id").is_none());
26412        assert!(frame.column("hidden").is_none());
26413        assert_eq!(
26414            frame.index().labels(),
26415            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
26416        );
26417        assert_eq!(
26418            frame.column("val").unwrap().values(),
26419            &[Scalar::Utf8("a".into()), Scalar::Utf8("b".into())]
26420        );
26421    }
26422
26423    #[cfg(feature = "sql-sqlite")]
26424    #[test]
26425    fn read_sql_table_chunks_with_options_columns_auto_project_index_col() {
26426        let conn = make_sql_test_conn();
26427        super::SqlConnection::execute_batch(
26428            &conn,
26429            "CREATE TABLE projected_index_chunks (id INTEGER, val TEXT, hidden TEXT);",
26430        )
26431        .unwrap();
26432        super::SqlConnection::execute_batch(
26433            &conn,
26434            "INSERT INTO projected_index_chunks VALUES \
26435                (10, 'a', 'x'), \
26436                (20, 'b', 'y'), \
26437                (30, 'c', 'z');",
26438        )
26439        .unwrap();
26440
26441        let chunks: Vec<DataFrame> = read_sql_table_chunks_with_options_and_index_col(
26442            &conn,
26443            "projected_index_chunks",
26444            &SqlReadOptions {
26445                columns: Some(vec!["val".to_owned()]),
26446                ..Default::default()
26447            },
26448            Some("id"),
26449            2,
26450        )
26451        .unwrap()
26452        .collect::<Result<Vec<_>, _>>()
26453        .unwrap();
26454
26455        assert_eq!(chunks.len(), 2);
26456        assert_eq!(chunks[0].column_names(), vec!["val"]);
26457        assert!(chunks[0].column("id").is_none());
26458        assert!(chunks[0].column("hidden").is_none());
26459        assert_eq!(
26460            chunks[0].index().labels(),
26461            &[IndexLabel::Int64(10), IndexLabel::Int64(20)]
26462        );
26463        assert_eq!(chunks[1].index().labels(), &[IndexLabel::Int64(30)]);
26464        assert_eq!(
26465            chunks[1].column("val").unwrap().values(),
26466            &[Scalar::Utf8("c".into())]
26467        );
26468    }
26469
26470    #[cfg(feature = "sql-sqlite")]
26471    #[test]
26472    fn read_sql_table_with_options_and_index_col_explicit_arg_wins_over_options() {
26473        // Both options.index_col=Some('a') and explicit index_col=Some('b').
26474        // The explicit arg must win — options.index_col is silently
26475        // overridden to avoid double-promotion.
26476        let conn = make_sql_test_conn();
26477        super::SqlConnection::execute_batch(
26478            &conn,
26479            "CREATE TABLE both (a INTEGER, b INTEGER, c TEXT);",
26480        )
26481        .unwrap();
26482        super::SqlConnection::execute_batch(
26483            &conn,
26484            "INSERT INTO both VALUES (1, 100, 'x'), (2, 200, 'y');",
26485        )
26486        .unwrap();
26487        let frame = read_sql_table_with_options_and_index_col(
26488            &conn,
26489            "both",
26490            &SqlReadOptions {
26491                index_col: Some("a".to_owned()),
26492                ..Default::default()
26493            },
26494            Some("b"),
26495        )
26496        .unwrap();
26497        // 'b' is promoted to index, 'a' and 'c' remain as columns.
26498        assert_eq!(frame.column_names(), vec!["a", "c"]);
26499        let labels: Vec<i64> = frame
26500            .index()
26501            .labels()
26502            .iter()
26503            .filter_map(|l| match l {
26504                IndexLabel::Int64(v) => Some(*v),
26505                _ => None,
26506            })
26507            .collect();
26508        assert_eq!(labels, vec![100, 200]);
26509    }
26510
26511    // ── SqlColumnSchema::autoincrement (br-bkl2 / fd90.37) ────────────────
26512
26513    #[cfg(feature = "sql-sqlite")]
26514    #[test]
26515    fn sql_table_schema_autoincrement_detected_on_integer_primary_key() {
26516        // SQLite rowid-alias rule: INTEGER PRIMARY KEY is an
26517        // auto-incrementing rowid alias.
26518        let conn = make_sql_test_conn();
26519        super::SqlConnection::execute_batch(
26520            &conn,
26521            "CREATE TABLE auto_a (id INTEGER PRIMARY KEY, name TEXT);",
26522        )
26523        .unwrap();
26524        let schema = sql_table_schema(&conn, "auto_a", None).unwrap().unwrap();
26525        let id = schema.column("id").unwrap();
26526        assert!(
26527            id.autoincrement,
26528            "INTEGER PRIMARY KEY must be autoincrement; got {id:?}"
26529        );
26530        let name = schema.column("name").unwrap();
26531        assert!(
26532            !name.autoincrement,
26533            "non-PK column must not be autoincrement"
26534        );
26535    }
26536
26537    #[cfg(feature = "sql-sqlite")]
26538    #[test]
26539    fn sql_table_schema_autoincrement_detected_with_explicit_keyword() {
26540        // The explicit AUTOINCREMENT keyword affects rowid reuse, not
26541        // the autoincrement property pandas surfaces.
26542        let conn = make_sql_test_conn();
26543        super::SqlConnection::execute_batch(
26544            &conn,
26545            "CREATE TABLE auto_b (id INTEGER PRIMARY KEY AUTOINCREMENT, val TEXT);",
26546        )
26547        .unwrap();
26548        let schema = sql_table_schema(&conn, "auto_b", None).unwrap().unwrap();
26549        let id = schema.column("id").unwrap();
26550        assert!(id.autoincrement);
26551    }
26552
26553    #[cfg(feature = "sql-sqlite")]
26554    #[test]
26555    fn sql_table_schema_autoincrement_not_set_for_text_primary_key() {
26556        // TEXT PRIMARY KEY is NOT a rowid alias; not auto-incrementing.
26557        let conn = make_sql_test_conn();
26558        super::SqlConnection::execute_batch(
26559            &conn,
26560            "CREATE TABLE text_pk (code TEXT PRIMARY KEY, name TEXT);",
26561        )
26562        .unwrap();
26563        let schema = sql_table_schema(&conn, "text_pk", None).unwrap().unwrap();
26564        let code = schema.column("code").unwrap();
26565        assert!(
26566            !code.autoincrement,
26567            "TEXT PRIMARY KEY is not autoincrement; got {code:?}"
26568        );
26569    }
26570
26571    #[cfg(feature = "sql-sqlite")]
26572    #[test]
26573    fn sql_table_schema_autoincrement_not_set_for_non_pk_integer() {
26574        let conn = make_sql_test_conn();
26575        super::SqlConnection::execute_batch(
26576            &conn,
26577            "CREATE TABLE plain_int (val INTEGER, name TEXT);",
26578        )
26579        .unwrap();
26580        let schema = sql_table_schema(&conn, "plain_int", None).unwrap().unwrap();
26581        let val = schema.column("val").unwrap();
26582        assert!(!val.autoincrement, "non-PK INTEGER is not autoincrement");
26583    }
26584
26585    #[cfg(feature = "sql-sqlite")]
26586    #[test]
26587    fn sql_table_schema_autoincrement_not_set_for_composite_pk_integer() {
26588        // Per fd90.42: SQLite's rowid-alias rule requires the column
26589        // to be the SOLE primary key. The tightened heuristic counts
26590        // PK columns first; composite PKs (multiple pk>0 rows) never
26591        // qualify even when the first column is INTEGER.
26592        let conn = make_sql_test_conn();
26593        super::SqlConnection::execute_batch(
26594            &conn,
26595            "CREATE TABLE composite_pk ( \
26596                year INTEGER NOT NULL, \
26597                month INTEGER NOT NULL, \
26598                code TEXT NOT NULL, \
26599                PRIMARY KEY (year, month, code) \
26600             );",
26601        )
26602        .unwrap();
26603        let schema = sql_table_schema(&conn, "composite_pk", None)
26604            .unwrap()
26605            .unwrap();
26606        let year = schema.column("year").unwrap();
26607        let month = schema.column("month").unwrap();
26608        let code = schema.column("code").unwrap();
26609        // Each part of the composite PK keeps its declaration-order
26610        // ordinal but NONE of them is autoincrement.
26611        assert_eq!(year.primary_key_ordinal, Some(0));
26612        assert_eq!(month.primary_key_ordinal, Some(1));
26613        assert_eq!(code.primary_key_ordinal, Some(2));
26614        assert!(
26615            !year.autoincrement,
26616            "composite PK first col must not be autoincrement"
26617        );
26618        assert!(!month.autoincrement);
26619        assert!(!code.autoincrement);
26620    }
26621
26622    #[cfg(feature = "sql-sqlite")]
26623    #[test]
26624    fn sql_table_schema_autoincrement_two_pass_count_distinguishes_single_vs_composite() {
26625        // Confirm the fix path: single-column INTEGER PRIMARY KEY -> true,
26626        // composite INTEGER+INTEGER PRIMARY KEY -> false on both.
26627        let conn = make_sql_test_conn();
26628        super::SqlConnection::execute_batch(
26629            &conn,
26630            "CREATE TABLE single_int_pk (id INTEGER PRIMARY KEY, label TEXT);",
26631        )
26632        .unwrap();
26633        super::SqlConnection::execute_batch(
26634            &conn,
26635            "CREATE TABLE composite_int_pk ( \
26636                a INTEGER NOT NULL, \
26637                b INTEGER NOT NULL, \
26638                PRIMARY KEY (a, b) \
26639             );",
26640        )
26641        .unwrap();
26642
26643        let single = sql_table_schema(&conn, "single_int_pk", None)
26644            .unwrap()
26645            .unwrap();
26646        assert!(single.column("id").unwrap().autoincrement);
26647
26648        let composite = sql_table_schema(&conn, "composite_int_pk", None)
26649            .unwrap()
26650            .unwrap();
26651        // Both columns have INTEGER type and pk>0 but neither qualifies.
26652        assert!(!composite.column("a").unwrap().autoincrement);
26653        assert!(!composite.column("b").unwrap().autoincrement);
26654    }
26655
26656    #[test]
26657    fn sql_table_schema_autoincrement_routes_to_backend_override() {
26658        // PG-like backend stub returns explicit autoincrement true for
26659        // a SERIAL/IDENTITY column.
26660        struct PgLikeAutoinc;
26661        impl super::SqlConnection for PgLikeAutoinc {
26662            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26663                Ok(SqlQueryResult {
26664                    columns: vec![],
26665                    rows: vec![],
26666                })
26667            }
26668            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26669                Ok(())
26670            }
26671            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26672                Ok(false)
26673            }
26674            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26675                Ok(())
26676            }
26677            fn dtype_sql(&self, _dtype: DType) -> &'static str {
26678                "TEXT"
26679            }
26680            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26681                "TEXT"
26682            }
26683            fn supports_schemas(&self) -> bool {
26684                true
26685            }
26686            fn table_schema(
26687                &self,
26688                table: &str,
26689                _schema: Option<&str>,
26690            ) -> Result<Option<SqlTableSchema>, IoError> {
26691                if table == "users" {
26692                    Ok(Some(SqlTableSchema {
26693                        table_name: "users".to_owned(),
26694                        columns: vec![
26695                            SqlColumnSchema {
26696                                name: "id".to_owned(),
26697                                declared_type: Some("BIGSERIAL".to_owned()),
26698                                nullable: false,
26699                                default_value: None,
26700                                primary_key_ordinal: Some(0),
26701                                comment: None,
26702                                autoincrement: true,
26703                            },
26704                            SqlColumnSchema {
26705                                name: "email".to_owned(),
26706                                declared_type: Some("TEXT".to_owned()),
26707                                nullable: false,
26708                                default_value: None,
26709                                primary_key_ordinal: None,
26710                                comment: None,
26711                                autoincrement: false,
26712                            },
26713                        ],
26714                    }))
26715                } else {
26716                    Ok(None)
26717                }
26718            }
26719        }
26720        let conn = PgLikeAutoinc;
26721        let schema = sql_table_schema(&conn, "users", None).unwrap().unwrap();
26722        assert!(schema.column("id").unwrap().autoincrement);
26723        assert!(!schema.column("email").unwrap().autoincrement);
26724    }
26725
26726    // ── SqlInspector facade (br-szs9 / fd90.38) ──────────────────────────
26727
26728    #[cfg(feature = "sql-sqlite")]
26729    #[test]
26730    fn sql_inspector_tables_views_schemas() {
26731        let conn = make_sql_test_conn();
26732        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t1 (x INTEGER);").unwrap();
26733        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t2 (y TEXT);").unwrap();
26734        super::SqlConnection::execute_batch(&conn, "CREATE VIEW v1 AS SELECT x FROM t1;").unwrap();
26735
26736        let inspector = SqlInspector::new(&conn);
26737        assert_eq!(inspector.tables(None).unwrap(), vec!["t1", "t2"]);
26738        assert_eq!(inspector.views(None).unwrap(), vec!["v1"]);
26739        // SQLite has no meaningful schemas → empty vec.
26740        assert!(inspector.schemas().unwrap().is_empty());
26741    }
26742
26743    #[cfg(feature = "sql-sqlite")]
26744    #[test]
26745    fn sql_inspector_columns_pk_indexes_fks() {
26746        let conn = make_sql_test_conn();
26747        super::SqlConnection::execute_batch(
26748            &conn,
26749            "CREATE TABLE parent (pid INTEGER PRIMARY KEY);",
26750        )
26751        .unwrap();
26752        super::SqlConnection::execute_batch(
26753            &conn,
26754            "CREATE TABLE child ( \
26755                cid INTEGER PRIMARY KEY, \
26756                parent_id INTEGER, \
26757                tag TEXT, \
26758                FOREIGN KEY (parent_id) REFERENCES parent(pid) \
26759             );",
26760        )
26761        .unwrap();
26762        super::SqlConnection::execute_batch(&conn, "CREATE INDEX idx_child_tag ON child(tag);")
26763            .unwrap();
26764
26765        let inspector = SqlInspector::new(&conn);
26766
26767        // columns: 3 columns on 'child', cid + parent_id + tag.
26768        let schema = inspector.columns("child", None).unwrap().unwrap();
26769        let names: Vec<&str> = schema.columns.iter().map(|c| c.name.as_str()).collect();
26770        assert_eq!(names, vec!["cid", "parent_id", "tag"]);
26771
26772        // primary_key_columns: cid is the sole PK.
26773        let pk = inspector.primary_key_columns("child", None).unwrap();
26774        assert_eq!(pk, vec!["cid"]);
26775
26776        // indexes: only the explicit user index (PK auto-index filtered).
26777        let indexes = inspector.indexes("child", None).unwrap();
26778        assert_eq!(indexes.len(), 1);
26779        assert_eq!(indexes[0].name, "idx_child_tag");
26780
26781        // foreign_keys: child references parent.
26782        let fks = inspector.foreign_keys("child", None).unwrap();
26783        assert_eq!(fks.len(), 1);
26784        assert_eq!(fks[0].columns, vec!["parent_id"]);
26785        assert_eq!(fks[0].referenced_table, "parent");
26786        assert_eq!(fks[0].referenced_columns, vec!["pid"]);
26787    }
26788
26789    #[cfg(feature = "sql-sqlite")]
26790    #[test]
26791    fn sql_inspector_unique_constraints_and_table_exists() {
26792        let conn = make_sql_test_conn();
26793        super::SqlConnection::execute_batch(
26794            &conn,
26795            "CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT UNIQUE);",
26796        )
26797        .unwrap();
26798        let inspector = SqlInspector::new(&conn);
26799        let uqs = inspector.unique_constraints("users", None).unwrap();
26800        assert_eq!(uqs.len(), 1);
26801        assert_eq!(uqs[0].columns, vec!["email"]);
26802        assert!(inspector.table_exists("users", None).unwrap());
26803        assert!(!inspector.table_exists("not_there", None).unwrap());
26804    }
26805
26806    #[cfg(feature = "sql-sqlite")]
26807    #[test]
26808    fn sql_inspector_server_version_and_dialect() {
26809        let conn = make_sql_test_conn();
26810        let inspector = SqlInspector::new(&conn);
26811        let version = inspector.server_version().unwrap().unwrap();
26812        assert!(version.starts_with("3."));
26813        assert_eq!(inspector.dialect_name(), "sqlite");
26814        // SQLite has no documented identifier-length cap.
26815        assert_eq!(inspector.max_identifier_length(), None);
26816    }
26817
26818    #[cfg(feature = "sql-sqlite")]
26819    #[test]
26820    fn sql_inspector_table_comment_returns_none_on_sqlite() {
26821        let conn = make_sql_test_conn();
26822        super::SqlConnection::execute_batch(&conn, "CREATE TABLE t (x INTEGER);").unwrap();
26823        let inspector = SqlInspector::new(&conn);
26824        assert!(inspector.table_comment("t", None).unwrap().is_none());
26825    }
26826
26827    #[cfg(feature = "sql-sqlite")]
26828    #[test]
26829    fn sql_inspector_via_inspect_helper() {
26830        // The free-fn `inspect(&conn)` is the one-shot construction helper.
26831        // Per fd90.46: import lives inside the test so --no-default-features
26832        // builds don't see it as unused.
26833        use super::inspect;
26834        let conn = make_sql_test_conn();
26835        super::SqlConnection::execute_batch(&conn, "CREATE TABLE one (x INTEGER);").unwrap();
26836        let inspector = inspect(&conn);
26837        assert_eq!(inspector.tables(None).unwrap(), vec!["one"]);
26838    }
26839
26840    #[test]
26841    fn sql_inspector_routes_schema_arg_to_backend() {
26842        // Multi-schema backend: verifies SqlInspector forwards the schema
26843        // arg to every method that accepts one.
26844        struct PgLikeStub;
26845        impl super::SqlConnection for PgLikeStub {
26846            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26847                Ok(SqlQueryResult {
26848                    columns: vec![],
26849                    rows: vec![],
26850                })
26851            }
26852            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26853                Ok(())
26854            }
26855            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
26856                Ok(false)
26857            }
26858            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
26859                Ok(())
26860            }
26861            fn dtype_sql(&self, _dtype: DType) -> &'static str {
26862                "TEXT"
26863            }
26864            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
26865                "TEXT"
26866            }
26867            fn supports_schemas(&self) -> bool {
26868                true
26869            }
26870            fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
26871                Ok(match schema {
26872                    Some("analytics") => vec!["events".to_owned()],
26873                    _ => vec![],
26874                })
26875            }
26876            fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
26877                Ok(match schema {
26878                    Some("analytics") => vec!["daily".to_owned()],
26879                    _ => vec![],
26880                })
26881            }
26882            fn list_schemas(&self) -> Result<Vec<String>, IoError> {
26883                Ok(vec!["public".to_owned(), "analytics".to_owned()])
26884            }
26885            fn dialect_name(&self) -> &'static str {
26886                "postgresql"
26887            }
26888            fn max_identifier_length(&self) -> Option<usize> {
26889                Some(63)
26890            }
26891        }
26892        let conn = PgLikeStub;
26893        let inspector = SqlInspector::new(&conn);
26894
26895        assert_eq!(inspector.tables(Some("analytics")).unwrap(), vec!["events"]);
26896        assert_eq!(inspector.views(Some("analytics")).unwrap(), vec!["daily"]);
26897        assert!(inspector.tables(Some("audit")).unwrap().is_empty());
26898        assert_eq!(inspector.schemas().unwrap(), vec!["public", "analytics"]);
26899        assert_eq!(inspector.dialect_name(), "postgresql");
26900        assert_eq!(inspector.max_identifier_length(), Some(63));
26901    }
26902
26903    // ── SqlInspector::has_column / column (br-ppry / fd90.39) ─────────────
26904
26905    #[cfg(feature = "sql-sqlite")]
26906    #[test]
26907    fn sql_inspector_has_column_returns_true_for_present_column() {
26908        let conn = make_sql_test_conn();
26909        super::SqlConnection::execute_batch(
26910            &conn,
26911            "CREATE TABLE has_col_tbl (id INTEGER, name TEXT);",
26912        )
26913        .unwrap();
26914        let inspector = SqlInspector::new(&conn);
26915        assert!(inspector.has_column("has_col_tbl", "id", None).unwrap());
26916        assert!(inspector.has_column("has_col_tbl", "name", None).unwrap());
26917    }
26918
26919    #[cfg(feature = "sql-sqlite")]
26920    #[test]
26921    fn sql_inspector_has_column_returns_false_for_missing_column() {
26922        let conn = make_sql_test_conn();
26923        super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_id (id INTEGER);").unwrap();
26924        let inspector = SqlInspector::new(&conn);
26925        // Table exists but no such column.
26926        assert!(!inspector.has_column("only_id", "name", None).unwrap());
26927    }
26928
26929    #[cfg(feature = "sql-sqlite")]
26930    #[test]
26931    fn sql_inspector_has_column_returns_false_for_missing_table() {
26932        let conn = make_sql_test_conn();
26933        let inspector = SqlInspector::new(&conn);
26934        // Table doesn't exist → has_column propagates Ok(false), not error.
26935        assert!(
26936            !inspector
26937                .has_column("no_such_tbl", "any_col", None)
26938                .unwrap()
26939        );
26940    }
26941
26942    #[cfg(feature = "sql-sqlite")]
26943    #[test]
26944    fn sql_inspector_column_returns_full_metadata_for_present_column() {
26945        let conn = make_sql_test_conn();
26946        super::SqlConnection::execute_batch(
26947            &conn,
26948            "CREATE TABLE detailed (id INTEGER PRIMARY KEY, status TEXT DEFAULT 'active');",
26949        )
26950        .unwrap();
26951        let inspector = SqlInspector::new(&conn);
26952        let id = inspector.column("detailed", "id", None).unwrap().unwrap();
26953        assert_eq!(id.name, "id");
26954        assert_eq!(id.declared_type.as_deref(), Some("INTEGER"));
26955        assert_eq!(id.primary_key_ordinal, Some(0));
26956        // INTEGER PRIMARY KEY → SQLite autoincrement (rowid alias).
26957        assert!(id.autoincrement);
26958
26959        let status = inspector
26960            .column("detailed", "status", None)
26961            .unwrap()
26962            .unwrap();
26963        assert_eq!(status.declared_type.as_deref(), Some("TEXT"));
26964        assert!(status.nullable);
26965        assert_eq!(status.default_value.as_deref(), Some("'active'"));
26966        assert!(!status.autoincrement);
26967    }
26968
26969    #[cfg(feature = "sql-sqlite")]
26970    #[test]
26971    fn sql_inspector_column_returns_none_for_missing_column_or_table() {
26972        let conn = make_sql_test_conn();
26973        super::SqlConnection::execute_batch(&conn, "CREATE TABLE only_x (x INTEGER);").unwrap();
26974        let inspector = SqlInspector::new(&conn);
26975        // Existing table, missing column → None.
26976        assert!(
26977            inspector
26978                .column("only_x", "missing", None)
26979                .unwrap()
26980                .is_none()
26981        );
26982        // Missing table → None.
26983        assert!(inspector.column("no_such", "any", None).unwrap().is_none());
26984    }
26985
26986    #[test]
26987    fn sql_inspector_has_column_routes_schema_arg_to_backend() {
26988        struct PgLikeColumns;
26989        impl super::SqlConnection for PgLikeColumns {
26990            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
26991                Ok(SqlQueryResult {
26992                    columns: vec![],
26993                    rows: vec![],
26994                })
26995            }
26996            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
26997                Ok(())
26998            }
26999            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27000                Ok(false)
27001            }
27002            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27003                Ok(())
27004            }
27005            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27006                "TEXT"
27007            }
27008            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27009                "TEXT"
27010            }
27011            fn supports_schemas(&self) -> bool {
27012                true
27013            }
27014            fn table_schema(
27015                &self,
27016                table: &str,
27017                schema: Option<&str>,
27018            ) -> Result<Option<SqlTableSchema>, IoError> {
27019                if table == "users" && schema == Some("public") {
27020                    Ok(Some(SqlTableSchema {
27021                        table_name: "users".to_owned(),
27022                        columns: vec![SqlColumnSchema {
27023                            name: "id".to_owned(),
27024                            declared_type: Some("BIGINT".to_owned()),
27025                            nullable: false,
27026                            default_value: None,
27027                            primary_key_ordinal: Some(0),
27028                            comment: None,
27029                            autoincrement: true,
27030                        }],
27031                    }))
27032                } else {
27033                    Ok(None)
27034                }
27035            }
27036        }
27037        let conn = PgLikeColumns;
27038        let inspector = SqlInspector::new(&conn);
27039        assert!(inspector.has_column("users", "id", Some("public")).unwrap());
27040        assert!(!inspector.has_column("users", "id", Some("audit")).unwrap());
27041        assert!(
27042            !inspector
27043                .has_column("users", "missing", Some("public"))
27044                .unwrap()
27045        );
27046
27047        let id_col = inspector
27048            .column("users", "id", Some("public"))
27049            .unwrap()
27050            .unwrap();
27051        assert_eq!(id_col.declared_type.as_deref(), Some("BIGINT"));
27052        assert!(id_col.autoincrement);
27053        assert!(
27054            inspector
27055                .column("users", "id", Some("audit"))
27056                .unwrap()
27057                .is_none()
27058        );
27059    }
27060
27061    // ── SqlInspector::reflect_table (br-76mw / fd90.40) ──────────────────
27062
27063    #[cfg(feature = "sql-sqlite")]
27064    #[test]
27065    fn sql_inspector_reflect_table_unknown_returns_none() {
27066        let conn = make_sql_test_conn();
27067        let inspector = SqlInspector::new(&conn);
27068        let result = inspector.reflect_table("no_such", None).unwrap();
27069        assert!(result.is_none());
27070    }
27071
27072    #[cfg(feature = "sql-sqlite")]
27073    #[test]
27074    fn sql_inspector_reflect_table_bundles_all_metadata() {
27075        let conn = make_sql_test_conn();
27076        super::SqlConnection::execute_batch(
27077            &conn,
27078            "CREATE TABLE parent (pid INTEGER PRIMARY KEY, code TEXT);",
27079        )
27080        .unwrap();
27081        super::SqlConnection::execute_batch(
27082            &conn,
27083            "CREATE TABLE bundled ( \
27084                id INTEGER PRIMARY KEY, \
27085                parent_id INTEGER, \
27086                slug TEXT, \
27087                email TEXT UNIQUE, \
27088                FOREIGN KEY (parent_id) REFERENCES parent(pid) \
27089             );",
27090        )
27091        .unwrap();
27092        super::SqlConnection::execute_batch(
27093            &conn,
27094            "CREATE INDEX idx_bundled_slug ON bundled(slug);",
27095        )
27096        .unwrap();
27097
27098        let inspector = SqlInspector::new(&conn);
27099        let bundle = inspector
27100            .reflect_table("bundled", None)
27101            .unwrap()
27102            .expect("table exists");
27103
27104        assert_eq!(bundle.table_name, "bundled");
27105
27106        // Columns: id, parent_id, slug, email.
27107        let names: Vec<&str> = bundle.columns.iter().map(|c| c.name.as_str()).collect();
27108        assert_eq!(names, vec!["id", "parent_id", "slug", "email"]);
27109        // INTEGER PRIMARY KEY id is autoincrement.
27110        let id_col = bundle
27111            .columns
27112            .iter()
27113            .find(|c| c.name == "id")
27114            .expect("id col");
27115        assert!(id_col.autoincrement);
27116
27117        // Primary key.
27118        assert_eq!(bundle.primary_key_columns, vec!["id"]);
27119
27120        // Indexes (only the user CREATE INDEX; the UNIQUE constraint
27121        // index goes via unique_constraints, the PK auto-index is
27122        // hidden).
27123        assert_eq!(bundle.indexes.len(), 1);
27124        assert_eq!(bundle.indexes[0].name, "idx_bundled_slug");
27125
27126        // Unique constraints (the inline UNIQUE on email).
27127        assert_eq!(bundle.unique_constraints.len(), 1);
27128        assert_eq!(bundle.unique_constraints[0].columns, vec!["email"]);
27129
27130        // Foreign keys (parent_id -> parent.pid).
27131        assert_eq!(bundle.foreign_keys.len(), 1);
27132        assert_eq!(bundle.foreign_keys[0].columns, vec!["parent_id"]);
27133        assert_eq!(bundle.foreign_keys[0].referenced_table, "parent");
27134
27135        // SQLite has no native column/table comment; comment is None.
27136        assert!(bundle.comment.is_none());
27137    }
27138
27139    #[test]
27140    fn sql_inspector_reflect_table_routes_to_backend_override() {
27141        // Multi-schema PG-like backend that returns explicit comment +
27142        // populated metadata. Verifies all five sub-calls flow through
27143        // and end up in the bundled struct.
27144        struct PgLikeBundle;
27145        impl super::SqlConnection for PgLikeBundle {
27146            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27147                Ok(SqlQueryResult {
27148                    columns: vec![],
27149                    rows: vec![],
27150                })
27151            }
27152            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27153                Ok(())
27154            }
27155            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27156                Ok(false)
27157            }
27158            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27159                Ok(())
27160            }
27161            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27162                "TEXT"
27163            }
27164            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27165                "TEXT"
27166            }
27167            fn supports_schemas(&self) -> bool {
27168                true
27169            }
27170            fn table_schema(
27171                &self,
27172                table: &str,
27173                schema: Option<&str>,
27174            ) -> Result<Option<SqlTableSchema>, IoError> {
27175                if table == "users" && schema == Some("public") {
27176                    Ok(Some(SqlTableSchema {
27177                        table_name: "users".to_owned(),
27178                        columns: vec![SqlColumnSchema {
27179                            name: "id".to_owned(),
27180                            declared_type: Some("BIGINT".to_owned()),
27181                            nullable: false,
27182                            default_value: None,
27183                            primary_key_ordinal: Some(0),
27184                            comment: None,
27185                            autoincrement: true,
27186                        }],
27187                    }))
27188                } else {
27189                    Ok(None)
27190                }
27191            }
27192            fn primary_key_columns(
27193                &self,
27194                table: &str,
27195                schema: Option<&str>,
27196            ) -> Result<Vec<String>, IoError> {
27197                if table == "users" && schema == Some("public") {
27198                    Ok(vec!["id".to_owned()])
27199                } else {
27200                    Ok(vec![])
27201                }
27202            }
27203            fn list_indexes(
27204                &self,
27205                _table: &str,
27206                _schema: Option<&str>,
27207            ) -> Result<Vec<SqlIndexSchema>, IoError> {
27208                Ok(vec![SqlIndexSchema {
27209                    name: "users_status_idx".to_owned(),
27210                    columns: vec!["status".to_owned()],
27211                    unique: false,
27212                }])
27213            }
27214            fn list_foreign_keys(
27215                &self,
27216                _table: &str,
27217                _schema: Option<&str>,
27218            ) -> Result<Vec<SqlForeignKeySchema>, IoError> {
27219                Ok(vec![])
27220            }
27221            fn list_unique_constraints(
27222                &self,
27223                _table: &str,
27224                _schema: Option<&str>,
27225            ) -> Result<Vec<SqlUniqueConstraintSchema>, IoError> {
27226                Ok(vec![SqlUniqueConstraintSchema {
27227                    name: "users_email_key".to_owned(),
27228                    columns: vec!["email".to_owned()],
27229                }])
27230            }
27231            fn table_comment(
27232                &self,
27233                _table: &str,
27234                _schema: Option<&str>,
27235            ) -> Result<Option<String>, IoError> {
27236                Ok(Some("Customer accounts".to_owned()))
27237            }
27238        }
27239        let conn = PgLikeBundle;
27240        let inspector = SqlInspector::new(&conn);
27241        let bundle = inspector
27242            .reflect_table("users", Some("public"))
27243            .unwrap()
27244            .expect("present");
27245        assert_eq!(bundle.table_name, "users");
27246        assert_eq!(bundle.columns.len(), 1);
27247        assert_eq!(bundle.primary_key_columns, vec!["id"]);
27248        assert_eq!(bundle.indexes.len(), 1);
27249        assert_eq!(bundle.indexes[0].name, "users_status_idx");
27250        assert_eq!(bundle.unique_constraints.len(), 1);
27251        assert_eq!(bundle.foreign_keys.len(), 0);
27252        assert_eq!(bundle.comment.as_deref(), Some("Customer accounts"));
27253
27254        // Wrong schema -> None (table_schema returns None).
27255        assert!(
27256            inspector
27257                .reflect_table("users", Some("audit"))
27258                .unwrap()
27259                .is_none()
27260        );
27261    }
27262
27263    // Use SqlReflectedTable in a smoke test so the struct's named
27264    // fields are exercised at the use-site too.
27265    #[test]
27266    fn sql_reflected_table_bundle_smoke_test() {
27267        let bundle = SqlReflectedTable {
27268            table_name: "t".to_owned(),
27269            columns: vec![],
27270            primary_key_columns: vec![],
27271            indexes: vec![],
27272            foreign_keys: vec![],
27273            unique_constraints: vec![],
27274            comment: None,
27275        };
27276        assert_eq!(bundle.table_name, "t");
27277        assert!(bundle.columns.is_empty());
27278    }
27279
27280    // ── SqlReflectedTable accessor methods (br-63ac / fd90.51) ────────────
27281
27282    #[test]
27283    fn sql_reflected_table_accessors_find_named_entries() {
27284        let bundle = SqlReflectedTable {
27285            table_name: "orders".to_owned(),
27286            columns: vec![
27287                SqlColumnSchema {
27288                    name: "id".to_owned(),
27289                    declared_type: Some("INTEGER".to_owned()),
27290                    nullable: false,
27291                    default_value: None,
27292                    primary_key_ordinal: Some(0),
27293                    comment: None,
27294                    autoincrement: true,
27295                },
27296                SqlColumnSchema {
27297                    name: "user_id".to_owned(),
27298                    declared_type: Some("INTEGER".to_owned()),
27299                    nullable: false,
27300                    default_value: None,
27301                    primary_key_ordinal: None,
27302                    comment: None,
27303                    autoincrement: false,
27304                },
27305            ],
27306            primary_key_columns: vec!["id".to_owned()],
27307            indexes: vec![SqlIndexSchema {
27308                name: "idx_orders_user".to_owned(),
27309                columns: vec!["user_id".to_owned()],
27310                unique: false,
27311            }],
27312            foreign_keys: vec![SqlForeignKeySchema {
27313                constraint_name: None,
27314                columns: vec!["user_id".to_owned()],
27315                referenced_table: "users".to_owned(),
27316                referenced_columns: vec!["id".to_owned()],
27317            }],
27318            unique_constraints: vec![SqlUniqueConstraintSchema {
27319                name: "uq_orders_id".to_owned(),
27320                columns: vec!["id".to_owned()],
27321            }],
27322            comment: Some("Customer orders".to_owned()),
27323        };
27324
27325        // column(name): present + missing.
27326        let id = bundle.column("id").expect("id column");
27327        assert_eq!(id.declared_type.as_deref(), Some("INTEGER"));
27328        assert!(id.autoincrement);
27329        assert!(bundle.column("missing").is_none());
27330
27331        // index(name): present + missing.
27332        let idx = bundle.index("idx_orders_user").expect("idx");
27333        assert_eq!(idx.columns, vec!["user_id"]);
27334        assert!(bundle.index("idx_does_not_exist").is_none());
27335
27336        // unique_constraint(name).
27337        let uq = bundle.unique_constraint("uq_orders_id").expect("uq");
27338        assert_eq!(uq.columns, vec!["id"]);
27339        assert!(bundle.unique_constraint("uq_missing").is_none());
27340
27341        // foreign_keys_for_column(col): matches the FK touching user_id.
27342        let fks = bundle.foreign_keys_for_column("user_id");
27343        assert_eq!(fks.len(), 1);
27344        assert_eq!(fks[0].referenced_table, "users");
27345        // Column not part of any FK -> empty.
27346        assert!(bundle.foreign_keys_for_column("id").is_empty());
27347        assert!(bundle.foreign_keys_for_column("nonexistent").is_empty());
27348    }
27349
27350    #[test]
27351    fn sql_reflected_table_foreign_keys_for_column_handles_composite_fks() {
27352        let bundle = SqlReflectedTable {
27353            table_name: "rolling_fact".to_owned(),
27354            columns: vec![],
27355            primary_key_columns: vec![],
27356            indexes: vec![],
27357            foreign_keys: vec![SqlForeignKeySchema {
27358                constraint_name: None,
27359                columns: vec!["fyear".to_owned(), "fmonth".to_owned()],
27360                referenced_table: "rolling".to_owned(),
27361                referenced_columns: vec!["year".to_owned(), "month".to_owned()],
27362            }],
27363            unique_constraints: vec![],
27364            comment: None,
27365        };
27366        // Composite FK touches both fyear and fmonth — both should
27367        // surface the same FK.
27368        assert_eq!(bundle.foreign_keys_for_column("fyear").len(), 1);
27369        assert_eq!(bundle.foreign_keys_for_column("fmonth").len(), 1);
27370        assert!(bundle.foreign_keys_for_column("year").is_empty()); // referenced col, not from col
27371    }
27372
27373    #[test]
27374    fn sql_reflected_table_foreign_keys_for_column_returns_multiple_when_relevant() {
27375        // Rare but valid: one column participates in two FKs (e.g.
27376        // same column referenced by separate FKs to two parents).
27377        let bundle = SqlReflectedTable {
27378            table_name: "audit".to_owned(),
27379            columns: vec![],
27380            primary_key_columns: vec![],
27381            indexes: vec![],
27382            foreign_keys: vec![
27383                SqlForeignKeySchema {
27384                    constraint_name: Some("fk_audit_a".to_owned()),
27385                    columns: vec!["entity_id".to_owned()],
27386                    referenced_table: "users".to_owned(),
27387                    referenced_columns: vec!["id".to_owned()],
27388                },
27389                SqlForeignKeySchema {
27390                    constraint_name: Some("fk_audit_b".to_owned()),
27391                    columns: vec!["entity_id".to_owned()],
27392                    referenced_table: "products".to_owned(),
27393                    referenced_columns: vec!["id".to_owned()],
27394                },
27395            ],
27396            unique_constraints: vec![],
27397            comment: None,
27398        };
27399        let fks = bundle.foreign_keys_for_column("entity_id");
27400        assert_eq!(fks.len(), 2);
27401        // Order preserved.
27402        assert_eq!(fks[0].constraint_name.as_deref(), Some("fk_audit_a"));
27403        assert_eq!(fks[1].constraint_name.as_deref(), Some("fk_audit_b"));
27404    }
27405
27406    // ── indexes_for_column / unique_constraints_for_column (br-37uy / fd90.52) ─
27407
27408    #[test]
27409    fn sql_reflected_table_indexes_for_column_matches_any_position() {
27410        let bundle = SqlReflectedTable {
27411            table_name: "rolling".to_owned(),
27412            columns: vec![],
27413            primary_key_columns: vec![],
27414            indexes: vec![
27415                SqlIndexSchema {
27416                    name: "idx_rolling_year".to_owned(),
27417                    columns: vec!["year".to_owned()],
27418                    unique: false,
27419                },
27420                SqlIndexSchema {
27421                    name: "idx_rolling_y_m_c".to_owned(),
27422                    columns: vec!["year".to_owned(), "month".to_owned(), "code".to_owned()],
27423                    unique: false,
27424                },
27425            ],
27426            foreign_keys: vec![],
27427            unique_constraints: vec![],
27428            comment: None,
27429        };
27430
27431        // 'year' appears in both indexes (first in idx_year, first in
27432        // composite). Returns both.
27433        let year_idxs = bundle.indexes_for_column("year");
27434        assert_eq!(year_idxs.len(), 2);
27435
27436        // 'month' only appears in the composite index, in middle position.
27437        let month_idxs = bundle.indexes_for_column("month");
27438        assert_eq!(month_idxs.len(), 1);
27439        assert_eq!(month_idxs[0].name, "idx_rolling_y_m_c");
27440
27441        // 'code' appears only in the composite, last position.
27442        let code_idxs = bundle.indexes_for_column("code");
27443        assert_eq!(code_idxs.len(), 1);
27444
27445        // Column not in any index.
27446        assert!(bundle.indexes_for_column("nonexistent").is_empty());
27447    }
27448
27449    #[test]
27450    fn sql_reflected_table_unique_constraints_for_column_matches_any_position() {
27451        let bundle = SqlReflectedTable {
27452            table_name: "events".to_owned(),
27453            columns: vec![],
27454            primary_key_columns: vec![],
27455            indexes: vec![],
27456            foreign_keys: vec![],
27457            unique_constraints: vec![
27458                SqlUniqueConstraintSchema {
27459                    name: "uq_events_email".to_owned(),
27460                    columns: vec!["email".to_owned()],
27461                },
27462                SqlUniqueConstraintSchema {
27463                    name: "uq_events_user_event_ts".to_owned(),
27464                    columns: vec!["user_id".to_owned(), "event_id".to_owned(), "ts".to_owned()],
27465                },
27466            ],
27467            comment: None,
27468        };
27469
27470        let email_uqs = bundle.unique_constraints_for_column("email");
27471        assert_eq!(email_uqs.len(), 1);
27472        assert_eq!(email_uqs[0].name, "uq_events_email");
27473
27474        // 'event_id' middle position in composite.
27475        let event_uqs = bundle.unique_constraints_for_column("event_id");
27476        assert_eq!(event_uqs.len(), 1);
27477        assert_eq!(event_uqs[0].columns, vec!["user_id", "event_id", "ts"]);
27478
27479        // 'ts' last position in composite.
27480        let ts_uqs = bundle.unique_constraints_for_column("ts");
27481        assert_eq!(ts_uqs.len(), 1);
27482
27483        assert!(
27484            bundle
27485                .unique_constraints_for_column("nonexistent")
27486                .is_empty()
27487        );
27488    }
27489
27490    #[test]
27491    fn sql_reflected_table_for_column_accessors_return_multiple() {
27492        // A column can appear in multiple indexes / unique constraints.
27493        let bundle = SqlReflectedTable {
27494            table_name: "wide".to_owned(),
27495            columns: vec![],
27496            primary_key_columns: vec![],
27497            indexes: vec![
27498                SqlIndexSchema {
27499                    name: "idx_a".to_owned(),
27500                    columns: vec!["x".to_owned()],
27501                    unique: false,
27502                },
27503                SqlIndexSchema {
27504                    name: "idx_b".to_owned(),
27505                    columns: vec!["x".to_owned(), "y".to_owned()],
27506                    unique: true,
27507                },
27508            ],
27509            foreign_keys: vec![],
27510            unique_constraints: vec![
27511                SqlUniqueConstraintSchema {
27512                    name: "uq_a".to_owned(),
27513                    columns: vec!["x".to_owned()],
27514                },
27515                SqlUniqueConstraintSchema {
27516                    name: "uq_b".to_owned(),
27517                    columns: vec!["x".to_owned(), "z".to_owned()],
27518                },
27519            ],
27520            comment: None,
27521        };
27522
27523        let idx_for_x = bundle.indexes_for_column("x");
27524        assert_eq!(idx_for_x.len(), 2);
27525        assert_eq!(idx_for_x[0].name, "idx_a");
27526        assert_eq!(idx_for_x[1].name, "idx_b");
27527
27528        let uq_for_x = bundle.unique_constraints_for_column("x");
27529        assert_eq!(uq_for_x.len(), 2);
27530        assert_eq!(uq_for_x[0].name, "uq_a");
27531        assert_eq!(uq_for_x[1].name, "uq_b");
27532    }
27533
27534    // ── SqlInspector::reflect_all_tables (br-jmmo / fd90.53) ─────────────
27535
27536    #[cfg(feature = "sql-sqlite")]
27537    #[test]
27538    fn sql_inspector_reflect_all_tables_empty_db() {
27539        let conn = make_sql_test_conn();
27540        let inspector = SqlInspector::new(&conn);
27541        let bundles = inspector.reflect_all_tables(None).unwrap();
27542        assert!(bundles.is_empty());
27543    }
27544
27545    #[cfg(feature = "sql-sqlite")]
27546    #[test]
27547    fn sql_inspector_reflect_all_tables_returns_one_bundle_per_table() {
27548        let conn = make_sql_test_conn();
27549        super::SqlConnection::execute_batch(
27550            &conn,
27551            "CREATE TABLE alpha (id INTEGER PRIMARY KEY, name TEXT);",
27552        )
27553        .unwrap();
27554        super::SqlConnection::execute_batch(&conn, "CREATE TABLE beta (uid INTEGER, label TEXT);")
27555            .unwrap();
27556        let inspector = SqlInspector::new(&conn);
27557        let bundles = inspector.reflect_all_tables(None).unwrap();
27558        assert_eq!(bundles.len(), 2);
27559        // Ordered alphabetically by list_tables.
27560        assert_eq!(bundles[0].table_name, "alpha");
27561        assert_eq!(bundles[1].table_name, "beta");
27562        // Each bundle has full metadata.
27563        assert_eq!(
27564            bundles[0]
27565                .columns
27566                .iter()
27567                .map(|c| c.name.as_str())
27568                .collect::<Vec<_>>(),
27569            vec!["id", "name"]
27570        );
27571        assert_eq!(bundles[0].primary_key_columns, vec!["id"]);
27572        assert_eq!(bundles[1].columns.len(), 2);
27573        assert!(bundles[1].primary_key_columns.is_empty());
27574    }
27575
27576    #[test]
27577    fn sql_inspector_reflect_all_tables_skips_disappearing_tables() {
27578        // Race-condition stub: list_tables returns ["a", "b"] but
27579        // table_schema returns None for "b" (simulating a concurrent
27580        // DROP between list and reflect). reflect_all_tables must
27581        // skip "b" without erroring.
27582        struct DisappearingTable;
27583        impl super::SqlConnection for DisappearingTable {
27584            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27585                Ok(SqlQueryResult {
27586                    columns: vec![],
27587                    rows: vec![],
27588                })
27589            }
27590            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27591                Ok(())
27592            }
27593            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27594                Ok(false)
27595            }
27596            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27597                Ok(())
27598            }
27599            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27600                "TEXT"
27601            }
27602            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27603                "TEXT"
27604            }
27605            fn list_tables(&self, _schema: Option<&str>) -> Result<Vec<String>, IoError> {
27606                Ok(vec!["a".to_owned(), "b".to_owned()])
27607            }
27608            fn table_schema(
27609                &self,
27610                table: &str,
27611                _schema: Option<&str>,
27612            ) -> Result<Option<SqlTableSchema>, IoError> {
27613                if table == "a" {
27614                    Ok(Some(SqlTableSchema {
27615                        table_name: "a".to_owned(),
27616                        columns: vec![SqlColumnSchema {
27617                            name: "x".to_owned(),
27618                            declared_type: Some("INTEGER".to_owned()),
27619                            nullable: true,
27620                            default_value: None,
27621                            primary_key_ordinal: None,
27622                            comment: None,
27623                            autoincrement: false,
27624                        }],
27625                    }))
27626                } else {
27627                    // b "disappeared" between list and reflect.
27628                    Ok(None)
27629                }
27630            }
27631        }
27632        let conn = DisappearingTable;
27633        let inspector = SqlInspector::new(&conn);
27634        let bundles = inspector.reflect_all_tables(None).unwrap();
27635        assert_eq!(bundles.len(), 1);
27636        assert_eq!(bundles[0].table_name, "a");
27637    }
27638
27639    #[test]
27640    fn sql_inspector_reflect_all_tables_routes_schema_arg() {
27641        // Multi-schema stub: list_tables returns different sets per
27642        // schema; reflect_all_tables must propagate the schema arg.
27643        struct MultiSchemaReflect;
27644        impl super::SqlConnection for MultiSchemaReflect {
27645            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27646                Ok(SqlQueryResult {
27647                    columns: vec![],
27648                    rows: vec![],
27649                })
27650            }
27651            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27652                Ok(())
27653            }
27654            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27655                Ok(false)
27656            }
27657            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27658                Ok(())
27659            }
27660            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27661                "TEXT"
27662            }
27663            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27664                "TEXT"
27665            }
27666            fn supports_schemas(&self) -> bool {
27667                true
27668            }
27669            fn list_tables(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
27670                Ok(match schema {
27671                    Some("analytics") => vec!["events".to_owned()],
27672                    _ => vec![],
27673                })
27674            }
27675            fn table_schema(
27676                &self,
27677                table: &str,
27678                schema: Option<&str>,
27679            ) -> Result<Option<SqlTableSchema>, IoError> {
27680                if table == "events" && schema == Some("analytics") {
27681                    Ok(Some(SqlTableSchema {
27682                        table_name: "events".to_owned(),
27683                        columns: vec![SqlColumnSchema {
27684                            name: "ts".to_owned(),
27685                            declared_type: Some("TIMESTAMPTZ".to_owned()),
27686                            nullable: false,
27687                            default_value: None,
27688                            primary_key_ordinal: None,
27689                            comment: None,
27690                            autoincrement: false,
27691                        }],
27692                    }))
27693                } else {
27694                    Ok(None)
27695                }
27696            }
27697        }
27698        let conn = MultiSchemaReflect;
27699        let inspector = SqlInspector::new(&conn);
27700        let bundles = inspector.reflect_all_tables(Some("analytics")).unwrap();
27701        assert_eq!(bundles.len(), 1);
27702        assert_eq!(bundles[0].table_name, "events");
27703        assert_eq!(
27704            bundles[0].columns[0].declared_type.as_deref(),
27705            Some("TIMESTAMPTZ")
27706        );
27707        // Wrong schema -> empty.
27708        assert!(
27709            inspector
27710                .reflect_all_tables(Some("audit"))
27711                .unwrap()
27712                .is_empty()
27713        );
27714    }
27715
27716    // ── SqlInspector::reflect_all_views (br-zuqt / fd90.54) ──────────────
27717
27718    #[cfg(feature = "sql-sqlite")]
27719    #[test]
27720    fn sql_inspector_reflect_all_views_empty_db() {
27721        let conn = make_sql_test_conn();
27722        let inspector = SqlInspector::new(&conn);
27723        let bundles = inspector.reflect_all_views(None).unwrap();
27724        assert!(bundles.is_empty());
27725    }
27726
27727    #[cfg(feature = "sql-sqlite")]
27728    #[test]
27729    fn sql_inspector_reflect_all_views_returns_one_bundle_per_view() {
27730        let conn = make_sql_test_conn();
27731        super::SqlConnection::execute_batch(&conn, "CREATE TABLE base (id INTEGER, label TEXT);")
27732            .unwrap();
27733        super::SqlConnection::execute_batch(
27734            &conn,
27735            "CREATE VIEW alpha_view AS SELECT id FROM base;",
27736        )
27737        .unwrap();
27738        super::SqlConnection::execute_batch(
27739            &conn,
27740            "CREATE VIEW zebra_view AS SELECT label FROM base;",
27741        )
27742        .unwrap();
27743        let inspector = SqlInspector::new(&conn);
27744        let bundles = inspector.reflect_all_views(None).unwrap();
27745        // Tables ARE NOT included — only views.
27746        assert_eq!(bundles.len(), 2);
27747        assert_eq!(bundles[0].table_name, "alpha_view");
27748        assert_eq!(bundles[1].table_name, "zebra_view");
27749        // Each view's columns are surfaced via PRAGMA table_info.
27750        assert_eq!(
27751            bundles[0]
27752                .columns
27753                .iter()
27754                .map(|c| c.name.as_str())
27755                .collect::<Vec<_>>(),
27756            vec!["id"]
27757        );
27758        assert_eq!(
27759            bundles[1]
27760                .columns
27761                .iter()
27762                .map(|c| c.name.as_str())
27763                .collect::<Vec<_>>(),
27764            vec!["label"]
27765        );
27766        // Views don't carry constraints — PK/FK/UC/index lists are empty.
27767        for bundle in &bundles {
27768            assert!(bundle.primary_key_columns.is_empty());
27769            assert!(bundle.indexes.is_empty());
27770            assert!(bundle.foreign_keys.is_empty());
27771            assert!(bundle.unique_constraints.is_empty());
27772        }
27773    }
27774
27775    #[test]
27776    fn sql_inspector_reflect_all_views_routes_schema_arg() {
27777        // Multi-schema stub: list_views returns different sets per schema.
27778        struct MultiSchemaViewReflect;
27779        impl super::SqlConnection for MultiSchemaViewReflect {
27780            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27781                Ok(SqlQueryResult {
27782                    columns: vec![],
27783                    rows: vec![],
27784                })
27785            }
27786            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27787                Ok(())
27788            }
27789            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27790                Ok(false)
27791            }
27792            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27793                Ok(())
27794            }
27795            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27796                "TEXT"
27797            }
27798            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27799                "TEXT"
27800            }
27801            fn supports_schemas(&self) -> bool {
27802                true
27803            }
27804            fn list_views(&self, schema: Option<&str>) -> Result<Vec<String>, IoError> {
27805                Ok(match schema {
27806                    Some("reporting") => vec!["weekly_summary".to_owned()],
27807                    _ => vec![],
27808                })
27809            }
27810            fn table_schema(
27811                &self,
27812                table: &str,
27813                schema: Option<&str>,
27814            ) -> Result<Option<SqlTableSchema>, IoError> {
27815                if table == "weekly_summary" && schema == Some("reporting") {
27816                    Ok(Some(SqlTableSchema {
27817                        table_name: "weekly_summary".to_owned(),
27818                        columns: vec![SqlColumnSchema {
27819                            name: "week".to_owned(),
27820                            declared_type: Some("DATE".to_owned()),
27821                            nullable: true,
27822                            default_value: None,
27823                            primary_key_ordinal: None,
27824                            comment: None,
27825                            autoincrement: false,
27826                        }],
27827                    }))
27828                } else {
27829                    Ok(None)
27830                }
27831            }
27832        }
27833        let conn = MultiSchemaViewReflect;
27834        let inspector = SqlInspector::new(&conn);
27835        let bundles = inspector.reflect_all_views(Some("reporting")).unwrap();
27836        assert_eq!(bundles.len(), 1);
27837        assert_eq!(bundles[0].table_name, "weekly_summary");
27838        assert_eq!(bundles[0].columns[0].declared_type.as_deref(), Some("DATE"));
27839        // Wrong schema -> empty.
27840        assert!(
27841            inspector
27842                .reflect_all_views(Some("audit"))
27843                .unwrap()
27844                .is_empty()
27845        );
27846    }
27847
27848    #[test]
27849    fn sql_inspector_reflect_table_calls_table_schema_only_once() {
27850        // Per fd90.43: reflect_table must derive primary_key_columns
27851        // from the fetched SqlTableSchema rather than dispatching
27852        // primary_key_columns() (which itself calls table_schema). A
27853        // recording stub counts table_schema invocations and asserts
27854        // exactly one round-trip.
27855        use std::cell::Cell;
27856        struct CountingTableSchema {
27857            table_schema_calls: Cell<usize>,
27858        }
27859        impl super::SqlConnection for CountingTableSchema {
27860            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<SqlQueryResult, IoError> {
27861                Ok(SqlQueryResult {
27862                    columns: vec![],
27863                    rows: vec![],
27864                })
27865            }
27866            fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27867                Ok(())
27868            }
27869            fn table_exists(&self, _name: &str) -> Result<bool, IoError> {
27870                Ok(false)
27871            }
27872            fn insert_rows(&self, _sql: &str, _rows: &[Vec<Scalar>]) -> Result<(), IoError> {
27873                Ok(())
27874            }
27875            fn dtype_sql(&self, _dtype: DType) -> &'static str {
27876                "TEXT"
27877            }
27878            fn index_dtype_sql(&self, _index: &Index) -> &'static str {
27879                "TEXT"
27880            }
27881            fn table_schema(
27882                &self,
27883                _table: &str,
27884                _schema: Option<&str>,
27885            ) -> Result<Option<SqlTableSchema>, IoError> {
27886                self.table_schema_calls
27887                    .set(self.table_schema_calls.get() + 1);
27888                Ok(Some(SqlTableSchema {
27889                    table_name: "x".to_owned(),
27890                    columns: vec![SqlColumnSchema {
27891                        name: "id".to_owned(),
27892                        declared_type: Some("BIGINT".to_owned()),
27893                        nullable: false,
27894                        default_value: None,
27895                        primary_key_ordinal: Some(0),
27896                        comment: None,
27897                        autoincrement: true,
27898                    }],
27899                }))
27900            }
27901        }
27902        let conn = CountingTableSchema {
27903            table_schema_calls: Cell::new(0),
27904        };
27905        let inspector = SqlInspector::new(&conn);
27906        let bundle = inspector.reflect_table("x", None).unwrap().unwrap();
27907        // Exactly one table_schema fetch — primary_key_columns derived
27908        // from the fetched meta, NOT a second round-trip.
27909        assert_eq!(conn.table_schema_calls.get(), 1);
27910        assert_eq!(bundle.primary_key_columns, vec!["id"]);
27911    }
27912
27913    // ── SqlReadOptions default coerce_float (br-o0x6 / fd90.41) ──────────
27914
27915    #[test]
27916    fn sql_read_options_default_coerce_float_matches_pandas() {
27917        // Pandas defaults coerce_float=True for read_sql / read_sql_query
27918        // / read_sql_table. We must match — any bare ::default() call
27919        // should opt INTO coerce_float, not opt out.
27920        let opts = SqlReadOptions::default();
27921        assert!(
27922            opts.coerce_float,
27923            "default coerce_float must be true (pandas parity)"
27924        );
27925        // Sanity: other defaults are the natural empty / None values.
27926        assert!(opts.params.is_none());
27927        assert!(opts.parse_dates.is_none());
27928        assert!(opts.dtype.is_none());
27929        assert!(opts.schema.is_none());
27930        assert!(opts.columns.is_none());
27931        assert!(opts.index_col.is_none());
27932    }
27933
27934    // ── SQL identifier quoting regression matrix (br-frankenpandas-fd90.12) ─
27935    //
27936    // Cross-product of (ANSI / MySQL backtick / MSSQL bracket) quoting
27937    // backends × (SELECT * / SELECT cols / CREATE TABLE / INSERT /
27938    // multi-row INSERT / DROP / TRUNCATE) × identifier shapes that the
27939    // shared validator currently allows: reserved-word names, mixed case,
27940    // leading digits, embedded quote chars (where the backend defines an
27941    // escape rule). All tests are pure query-builder assertions — no live
27942    // backend touched.
27943
27944    #[derive(Default)]
27945    struct AnsiSchemaConn;
27946    impl super::SqlConnection for AnsiSchemaConn {
27947        fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
27948            Ok(super::SqlQueryResult {
27949                columns: vec![],
27950                rows: vec![],
27951            })
27952        }
27953        fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27954            Ok(())
27955        }
27956        fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
27957            Ok(false)
27958        }
27959        fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
27960            Ok(())
27961        }
27962        fn dtype_sql(&self, _d: DType) -> &'static str {
27963            "TEXT"
27964        }
27965        fn index_dtype_sql(&self, _i: &Index) -> &'static str {
27966            "TEXT"
27967        }
27968        fn supports_schemas(&self) -> bool {
27969            true
27970        }
27971        fn parameter_marker(&self, ordinal: usize) -> String {
27972            format!("${ordinal}")
27973        }
27974        fn max_identifier_length(&self) -> Option<usize> {
27975            Some(63)
27976        }
27977    }
27978
27979    #[derive(Default)]
27980    struct MysqlBacktickConn;
27981    impl super::SqlConnection for MysqlBacktickConn {
27982        fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
27983            Ok(super::SqlQueryResult {
27984                columns: vec![],
27985                rows: vec![],
27986            })
27987        }
27988        fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
27989            Ok(())
27990        }
27991        fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
27992            Ok(false)
27993        }
27994        fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
27995            Ok(())
27996        }
27997        fn dtype_sql(&self, _d: DType) -> &'static str {
27998            "TEXT"
27999        }
28000        fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28001            "TEXT"
28002        }
28003        fn supports_schemas(&self) -> bool {
28004            true
28005        }
28006        fn parameter_marker(&self, _ordinal: usize) -> String {
28007            "?".to_owned()
28008        }
28009        fn max_identifier_length(&self) -> Option<usize> {
28010            Some(64)
28011        }
28012        fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28013            if ident.contains('\0') {
28014                return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28015            }
28016            Ok(format!("`{}`", ident.replace('`', "``")))
28017        }
28018    }
28019
28020    #[derive(Default)]
28021    struct MssqlBracketConn;
28022    impl super::SqlConnection for MssqlBracketConn {
28023        fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28024            Ok(super::SqlQueryResult {
28025                columns: vec![],
28026                rows: vec![],
28027            })
28028        }
28029        fn execute_batch(&self, _sql: &str) -> Result<(), IoError> {
28030            Ok(())
28031        }
28032        fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28033            Ok(false)
28034        }
28035        fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28036            Ok(())
28037        }
28038        fn dtype_sql(&self, _d: DType) -> &'static str {
28039            "NVARCHAR(MAX)"
28040        }
28041        fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28042            "NVARCHAR(MAX)"
28043        }
28044        fn supports_schemas(&self) -> bool {
28045            true
28046        }
28047        fn parameter_marker(&self, ordinal: usize) -> String {
28048            format!("@p{ordinal}")
28049        }
28050        fn max_identifier_length(&self) -> Option<usize> {
28051            Some(128)
28052        }
28053        fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28054            if ident.contains('\0') {
28055                return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28056            }
28057            // T-SQL bracket quoting: doubled `]` escapes a literal `]`.
28058            Ok(format!("[{}]", ident.replace(']', "]]")))
28059        }
28060    }
28061
28062    /// Reserved SQL keywords that the shape validator allows as
28063    /// alphanumeric identifiers — they must round-trip through every
28064    /// query builder safely (i.e. quoted, never bare).
28065    const FD90_12_RESERVED_WORDS: &[&str] = &[
28066        "select", "from", "where", "order", "group", "table", "index", "join",
28067    ];
28068
28069    #[test]
28070    fn fd90_12_quoting_matrix_select_all_reserved_words_quoted_per_dialect() {
28071        let ansi = AnsiSchemaConn;
28072        let mysql = MysqlBacktickConn;
28073        let mssql = MssqlBracketConn;
28074        for word in FD90_12_RESERVED_WORDS {
28075            assert_eq!(
28076                super::sql_select_all_query(&ansi, word).expect("ansi select"),
28077                format!("SELECT * FROM \"{word}\""),
28078                "ansi reserved word `{word}`"
28079            );
28080            assert_eq!(
28081                super::sql_select_all_query(&mysql, word).expect("mysql select"),
28082                format!("SELECT * FROM `{word}`"),
28083                "mysql reserved word `{word}`"
28084            );
28085            assert_eq!(
28086                super::sql_select_all_query(&mssql, word).expect("mssql select"),
28087                format!("SELECT * FROM [{word}]"),
28088                "mssql reserved word `{word}`"
28089            );
28090        }
28091    }
28092
28093    #[test]
28094    fn fd90_12_quoting_matrix_select_columns_mixed_case_preserved_per_dialect() {
28095        let ansi = AnsiSchemaConn;
28096        let mysql = MysqlBacktickConn;
28097        let mssql = MssqlBracketConn;
28098        let cases: &[&str] = &["MyCol", "MIXEDcase", "camelCase", "SCREAMING_SNAKE"];
28099        for col in cases {
28100            assert_eq!(
28101                super::sql_select_columns_query(&ansi, "users", &[col]).expect("ansi cols"),
28102                format!("SELECT \"{col}\" FROM \"users\""),
28103                "ansi mixed-case col `{col}`"
28104            );
28105            assert_eq!(
28106                super::sql_select_columns_query(&mysql, "users", &[col]).expect("mysql cols"),
28107                format!("SELECT `{col}` FROM `users`"),
28108                "mysql mixed-case col `{col}`"
28109            );
28110            assert_eq!(
28111                super::sql_select_columns_query(&mssql, "users", &[col]).expect("mssql cols"),
28112                format!("SELECT [{col}] FROM [users]"),
28113                "mssql mixed-case col `{col}`"
28114            );
28115        }
28116    }
28117
28118    #[test]
28119    fn fd90_12_quoting_matrix_leading_digit_identifiers_quoted_per_dialect() {
28120        let ansi = AnsiSchemaConn;
28121        let mysql = MysqlBacktickConn;
28122        let mssql = MssqlBracketConn;
28123        let cases: &[&str] = &["1col", "2nd_place", "9lives", "123"];
28124        for col in cases {
28125            assert_eq!(
28126                super::sql_select_columns_query(&ansi, "tbl", &[col]).expect("ansi"),
28127                format!("SELECT \"{col}\" FROM \"tbl\"")
28128            );
28129            assert_eq!(
28130                super::sql_select_columns_query(&mysql, "tbl", &[col]).expect("mysql"),
28131                format!("SELECT `{col}` FROM `tbl`")
28132            );
28133            assert_eq!(
28134                super::sql_select_columns_query(&mssql, "tbl", &[col]).expect("mssql"),
28135                format!("SELECT [{col}] FROM [tbl]")
28136            );
28137        }
28138    }
28139
28140    #[test]
28141    fn fd90_12_quoting_matrix_schema_qualified_select_per_dialect() {
28142        let ansi = AnsiSchemaConn;
28143        let mysql = MysqlBacktickConn;
28144        let mssql = MssqlBracketConn;
28145        assert_eq!(
28146            super::sql_select_all_query_in_schema(&ansi, "users", Some("analytics")).expect("ansi"),
28147            "SELECT * FROM \"analytics\".\"users\""
28148        );
28149        assert_eq!(
28150            super::sql_select_all_query_in_schema(&mysql, "users", Some("analytics"))
28151                .expect("mysql"),
28152            "SELECT * FROM `analytics`.`users`"
28153        );
28154        assert_eq!(
28155            super::sql_select_all_query_in_schema(&mssql, "users", Some("dbo")).expect("mssql"),
28156            "SELECT * FROM [dbo].[users]"
28157        );
28158    }
28159
28160    #[test]
28161    fn fd90_12_quoting_matrix_create_table_per_dialect() {
28162        let ansi = AnsiSchemaConn;
28163        let mysql = MysqlBacktickConn;
28164        let mssql = MssqlBracketConn;
28165        let cols = vec![
28166            super::sql_column_definition(&ansi, "id", "BIGINT").expect("ansi col"),
28167            super::sql_column_definition(&ansi, "select", "TEXT").expect("ansi reserved col"),
28168        ];
28169        assert_eq!(
28170            super::sql_create_table_query_in_schema(&ansi, "events", Some("public"), &cols)
28171                .expect("ansi create"),
28172            "CREATE TABLE IF NOT EXISTS \"public\".\"events\" (\"id\" BIGINT, \"select\" TEXT)"
28173        );
28174        let mysql_cols = vec![
28175            super::sql_column_definition(&mysql, "id", "BIGINT").expect("mysql col"),
28176            super::sql_column_definition(&mysql, "select", "TEXT").expect("mysql reserved col"),
28177        ];
28178        assert_eq!(
28179            super::sql_create_table_query_in_schema(
28180                &mysql,
28181                "events",
28182                Some("analytics"),
28183                &mysql_cols
28184            )
28185            .expect("mysql create"),
28186            "CREATE TABLE IF NOT EXISTS `analytics`.`events` (`id` BIGINT, `select` TEXT)"
28187        );
28188        let mssql_cols = vec![
28189            super::sql_column_definition(&mssql, "id", "BIGINT").expect("mssql col"),
28190            super::sql_column_definition(&mssql, "select", "NVARCHAR(MAX)")
28191                .expect("mssql reserved col"),
28192        ];
28193        assert_eq!(
28194            super::sql_create_table_query_in_schema(&mssql, "events", Some("dbo"), &mssql_cols)
28195                .expect("mssql create"),
28196            "CREATE TABLE IF NOT EXISTS [dbo].[events] ([id] BIGINT, [select] NVARCHAR(MAX))"
28197        );
28198    }
28199
28200    #[test]
28201    fn fd90_12_quoting_matrix_insert_per_dialect_with_param_markers() {
28202        let ansi = AnsiSchemaConn;
28203        let mysql = MysqlBacktickConn;
28204        let mssql = MssqlBracketConn;
28205        let cols = vec!["id".to_owned(), "MixedCase".to_owned(), "select".to_owned()];
28206        assert_eq!(
28207            super::sql_insert_rows_query_in_schema(&ansi, "events", Some("public"), &cols)
28208                .expect("ansi insert"),
28209            "INSERT INTO \"public\".\"events\" (\"id\", \"MixedCase\", \"select\") VALUES ($1, $2, $3)"
28210        );
28211        assert_eq!(
28212            super::sql_insert_rows_query_in_schema(&mysql, "events", Some("analytics"), &cols)
28213                .expect("mysql insert"),
28214            "INSERT INTO `analytics`.`events` (`id`, `MixedCase`, `select`) VALUES (?, ?, ?)"
28215        );
28216        assert_eq!(
28217            super::sql_insert_rows_query_in_schema(&mssql, "events", Some("dbo"), &cols)
28218                .expect("mssql insert"),
28219            "INSERT INTO [dbo].[events] ([id], [MixedCase], [select]) VALUES (@p1, @p2, @p3)"
28220        );
28221    }
28222
28223    #[test]
28224    fn fd90_12_quoting_matrix_multi_row_insert_param_ordinals_span_rows() {
28225        let ansi = AnsiSchemaConn;
28226        let mysql = MysqlBacktickConn;
28227        let cols = vec!["a".to_owned(), "b".to_owned()];
28228        assert_eq!(
28229            super::sql_multi_row_insert_query_in_schema(&ansi, "tbl", None, &cols, 2)
28230                .expect("ansi multi"),
28231            "INSERT INTO \"tbl\" (\"a\", \"b\") VALUES ($1, $2), ($3, $4)"
28232        );
28233        assert_eq!(
28234            super::sql_multi_row_insert_query_in_schema(&mysql, "tbl", None, &cols, 2)
28235                .expect("mysql multi"),
28236            "INSERT INTO `tbl` (`a`, `b`) VALUES (?, ?), (?, ?)"
28237        );
28238    }
28239
28240    #[test]
28241    fn fd90_12_quoting_matrix_drop_table_per_dialect() {
28242        let ansi = AnsiSchemaConn;
28243        let mysql = MysqlBacktickConn;
28244        let mssql = MssqlBracketConn;
28245        assert_eq!(
28246            super::sql_drop_table_query_in_schema(&ansi, "events", Some("public"))
28247                .expect("ansi drop"),
28248            "DROP TABLE IF EXISTS \"public\".\"events\""
28249        );
28250        assert_eq!(
28251            super::sql_drop_table_query_in_schema(&mysql, "events", Some("analytics"))
28252                .expect("mysql drop"),
28253            "DROP TABLE IF EXISTS `analytics`.`events`"
28254        );
28255        assert_eq!(
28256            super::sql_drop_table_query_in_schema(&mssql, "events", Some("dbo"))
28257                .expect("mssql drop"),
28258            "DROP TABLE IF EXISTS [dbo].[events]"
28259        );
28260    }
28261
28262    #[test]
28263    fn fd90_12_quoting_matrix_truncate_uses_default_delete_with_per_dialect_quoting() {
28264        // Default truncate_table impl emits `DELETE FROM <quoted>` and
28265        // routes through the backend's quote_identifier.
28266        #[derive(Default)]
28267        struct CapturingAnsi {
28268            captured: std::cell::RefCell<Vec<String>>,
28269        }
28270        impl super::SqlConnection for CapturingAnsi {
28271            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28272                Ok(super::SqlQueryResult {
28273                    columns: vec![],
28274                    rows: vec![],
28275                })
28276            }
28277            fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
28278                self.captured.borrow_mut().push(sql.to_owned());
28279                Ok(())
28280            }
28281            fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28282                Ok(false)
28283            }
28284            fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28285                Ok(())
28286            }
28287            fn dtype_sql(&self, _d: DType) -> &'static str {
28288                "TEXT"
28289            }
28290            fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28291                "TEXT"
28292            }
28293            fn supports_schemas(&self) -> bool {
28294                true
28295            }
28296        }
28297        let ansi = CapturingAnsi::default();
28298        super::SqlConnection::truncate_table(&ansi, "events", Some("public"))
28299            .expect("ansi truncate");
28300        assert_eq!(
28301            ansi.captured.borrow().as_slice(),
28302            &["DELETE FROM \"public\".\"events\"".to_owned()]
28303        );
28304
28305        #[derive(Default)]
28306        struct CapturingMysql {
28307            captured: std::cell::RefCell<Vec<String>>,
28308        }
28309        impl super::SqlConnection for CapturingMysql {
28310            fn query(&self, _q: &str, _p: &[Scalar]) -> Result<super::SqlQueryResult, IoError> {
28311                Ok(super::SqlQueryResult {
28312                    columns: vec![],
28313                    rows: vec![],
28314                })
28315            }
28316            fn execute_batch(&self, sql: &str) -> Result<(), IoError> {
28317                self.captured.borrow_mut().push(sql.to_owned());
28318                Ok(())
28319            }
28320            fn table_exists(&self, _n: &str) -> Result<bool, IoError> {
28321                Ok(false)
28322            }
28323            fn insert_rows(&self, _s: &str, _r: &[Vec<Scalar>]) -> Result<(), IoError> {
28324                Ok(())
28325            }
28326            fn dtype_sql(&self, _d: DType) -> &'static str {
28327                "TEXT"
28328            }
28329            fn index_dtype_sql(&self, _i: &Index) -> &'static str {
28330                "TEXT"
28331            }
28332            fn supports_schemas(&self) -> bool {
28333                true
28334            }
28335            fn quote_identifier(&self, ident: &str) -> Result<String, IoError> {
28336                if ident.contains('\0') {
28337                    return Err(IoError::Sql("invalid SQL identifier: NUL byte".to_owned()));
28338                }
28339                Ok(format!("`{}`", ident.replace('`', "``")))
28340            }
28341        }
28342        let mysql = CapturingMysql::default();
28343        super::SqlConnection::truncate_table(&mysql, "events", Some("analytics"))
28344            .expect("mysql truncate");
28345        assert_eq!(
28346            mysql.captured.borrow().as_slice(),
28347            &["DELETE FROM `analytics`.`events`".to_owned()]
28348        );
28349    }
28350
28351    #[test]
28352    fn fd90_12_quoting_matrix_embedded_quote_chars_doubled_per_dialect() {
28353        // Embedded quote chars must be doubled per the dialect's escape
28354        // rule. quote_identifier is exposed for column names which may
28355        // legitimately contain embedded quotes (sql_column_definition
28356        // takes any string).
28357        let ansi = AnsiSchemaConn;
28358        let mysql = MysqlBacktickConn;
28359        let mssql = MssqlBracketConn;
28360        use super::SqlConnection as _;
28361        assert_eq!(ansi.quote_identifier("a\"b").expect("ansi"), "\"a\"\"b\"");
28362        assert_eq!(mysql.quote_identifier("a`b").expect("mysql"), "`a``b`");
28363        assert_eq!(mssql.quote_identifier("a]b").expect("mssql"), "[a]]b]");
28364        // Cross-dialect non-escape: ANSI doesn't escape backticks, etc.
28365        assert_eq!(
28366            ansi.quote_identifier("a`b")
28367                .expect("ansi backtick passthrough"),
28368            "\"a`b\""
28369        );
28370        assert_eq!(
28371            mysql
28372                .quote_identifier("a\"b")
28373                .expect("mysql quote passthrough"),
28374            "`a\"b`"
28375        );
28376        assert_eq!(
28377            mssql
28378                .quote_identifier("a\"b")
28379                .expect("mssql quote passthrough"),
28380            "[a\"b]"
28381        );
28382    }
28383
28384    #[test]
28385    fn fd90_12_quoting_matrix_long_identifier_within_cap_succeeds_over_cap_rejected() {
28386        // PG cap = 63, MySQL cap = 64, MSSQL cap = 128.
28387        use super::SqlConnection as _;
28388        let ansi = AnsiSchemaConn;
28389        let mysql = MysqlBacktickConn;
28390        let mssql = MssqlBracketConn;
28391        let pg63 = "a".repeat(63);
28392        let pg64 = "a".repeat(64);
28393        let mysql64 = "b".repeat(64);
28394        let mysql65 = "b".repeat(65);
28395        let mssql128 = "c".repeat(128);
28396        let mssql129 = "c".repeat(129);
28397
28398        super::validate_sql_identifier_length(&pg63, ansi.max_identifier_length(), "table")
28399            .expect("pg 63 ok");
28400        super::validate_sql_identifier_length(&mysql64, mysql.max_identifier_length(), "table")
28401            .expect("mysql 64 ok");
28402        super::validate_sql_identifier_length(&mssql128, mssql.max_identifier_length(), "table")
28403            .expect("mssql 128 ok");
28404
28405        let err =
28406            super::validate_sql_identifier_length(&pg64, ansi.max_identifier_length(), "table")
28407                .expect_err("pg 64 over cap");
28408        assert!(matches!(err, IoError::Sql(msg) if msg.contains("63") && msg.contains("table")));
28409        let err =
28410            super::validate_sql_identifier_length(&mysql65, mysql.max_identifier_length(), "table")
28411                .expect_err("mysql 65 over cap");
28412        assert!(matches!(err, IoError::Sql(msg) if msg.contains("64")));
28413        let err = super::validate_sql_identifier_length(
28414            &mssql129,
28415            mssql.max_identifier_length(),
28416            "table",
28417        )
28418        .expect_err("mssql 129 over cap");
28419        assert!(matches!(err, IoError::Sql(msg) if msg.contains("128")));
28420    }
28421
28422    #[test]
28423    fn fd90_12_query_builders_enforce_identifier_length_caps() {
28424        fn assert_length_error(err: IoError, kind: &str) {
28425            assert!(
28426                matches!(&err, IoError::Sql(msg)
28427                    if msg.contains(kind)
28428                        && msg.contains("63")
28429                        && msg.contains("backend identifier limit")),
28430                "expected SQL identifier-length error for {kind}, got {err:?}"
28431            );
28432        }
28433
28434        let conn = AnsiSchemaConn;
28435        let over_cap = "a".repeat(64);
28436        let cols = vec![over_cap.clone()];
28437        let defs = vec!["id BIGINT".to_owned()];
28438
28439        assert_length_error(
28440            super::sql_select_all_query_in_schema(&conn, &over_cap, None)
28441                .expect_err("SELECT * table over cap"),
28442            "table",
28443        );
28444        assert_length_error(
28445            super::sql_select_all_query_in_schema(&conn, "events", Some(&over_cap))
28446                .expect_err("SELECT * schema over cap"),
28447            "schema",
28448        );
28449        assert_length_error(
28450            super::sql_select_columns_query_in_schema(&conn, "events", None, &[over_cap.as_str()])
28451                .expect_err("SELECT column over cap"),
28452            "column",
28453        );
28454        assert_length_error(
28455            super::sql_create_table_query_in_schema(&conn, &over_cap, None, &defs)
28456                .expect_err("CREATE table over cap"),
28457            "table",
28458        );
28459        assert_length_error(
28460            super::sql_insert_rows_query_in_schema(&conn, "events", None, &cols)
28461                .expect_err("INSERT column over cap"),
28462            "column",
28463        );
28464        assert_length_error(
28465            super::sql_multi_row_insert_query_in_schema(&conn, "events", None, &cols, 1)
28466                .expect_err("multi-row INSERT column over cap"),
28467            "column",
28468        );
28469        assert_length_error(
28470            super::sql_drop_table_query_in_schema(&conn, &over_cap, None)
28471                .expect_err("DROP table over cap"),
28472            "table",
28473        );
28474        assert_length_error(
28475            super::SqlConnection::truncate_table(&conn, &over_cap, None)
28476                .expect_err("TRUNCATE fallback table over cap"),
28477            "table",
28478        );
28479    }
28480
28481    #[test]
28482    fn fd90_12_quoting_matrix_special_characters_rejected_by_validator() {
28483        // Per validate_sql_ident: only alphanumeric + underscore allowed.
28484        // Special chars (`-`, `.`, ` `, `:`, `'`, `"`, `;`) and dotted
28485        // names must be rejected before they ever reach quote_identifier.
28486        let bad: &[&str] = &[
28487            "my-col",
28488            "my.col",
28489            "my col",
28490            "my:col",
28491            "my'col",
28492            "my\"col",
28493            "my;col",
28494            "schema.table",
28495            "DROP--",
28496            "",
28497        ];
28498        for name in bad {
28499            let err =
28500                super::validate_sql_table_name(name).expect_err(&format!("must reject `{name}`"));
28501            assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid table name")));
28502            let err = super::validate_sql_column_name(name)
28503                .expect_err(&format!("must reject col `{name}`"));
28504            assert!(matches!(err, IoError::Sql(msg) if msg.contains("invalid column name")));
28505        }
28506    }
28507
28508    #[test]
28509    fn fd90_12_quoting_matrix_nul_byte_rejected_at_quote_identifier_layer() {
28510        // Defense in depth: even if a backend's quote_identifier is
28511        // called with a NUL-containing string (bypassing
28512        // validate_sql_ident), every dialect must reject — guards
28513        // against C-string driver-layer statement injection via
28514        // embedded null terminators.
28515        use super::SqlConnection as _;
28516        let ansi = AnsiSchemaConn;
28517        let mysql = MysqlBacktickConn;
28518        let mssql = MssqlBracketConn;
28519        let err_ansi = ansi
28520            .quote_identifier("ab\0cd")
28521            .expect_err("ansi must reject NUL");
28522        assert!(matches!(err_ansi, IoError::Sql(msg) if msg.contains("NUL")));
28523        let err_mysql = mysql
28524            .quote_identifier("ab\0cd")
28525            .expect_err("mysql must reject NUL");
28526        assert!(matches!(err_mysql, IoError::Sql(msg) if msg.contains("NUL")));
28527        let err_mssql = mssql
28528            .quote_identifier("ab\0cd")
28529            .expect_err("mssql must reject NUL");
28530        assert!(matches!(err_mssql, IoError::Sql(msg) if msg.contains("NUL")));
28531    }
28532
28533    #[cfg(feature = "sql-sqlite")]
28534    #[test]
28535    fn read_sql_empty_typed_table_preserves_column_dtypes_ex8ec() {
28536        let conn = make_sql_test_conn();
28537        super::SqlConnection::execute_batch(
28538            &conn,
28539            "CREATE TABLE empty_typed_ex8ec (i INTEGER, t TEXT, r REAL);",
28540        )
28541        .expect("create");
28542        // No INSERTs — empty result set.
28543
28544        let frame = read_sql(&conn, "SELECT * FROM empty_typed_ex8ec").expect("read empty");
28545        assert_eq!(frame.index().len(), 0, "empty table should yield zero rows");
28546
28547        let i_col = frame.column("i").expect("column i must exist");
28548        assert_eq!(i_col.dtype(), crate::DType::Int64);
28549        let t_col = frame.column("t").expect("column t must exist");
28550        assert_eq!(t_col.dtype(), crate::DType::Utf8);
28551        let r_col = frame.column("r").expect("column r must exist");
28552        assert_eq!(r_col.dtype(), crate::DType::Float64);
28553    }
28554
28555    #[cfg(feature = "sql-sqlite")]
28556    #[test]
28557    fn read_sql_all_null_typed_table_preserves_column_dtypes_0qo9c() {
28558        let conn = make_sql_test_conn();
28559        super::SqlConnection::execute_batch(
28560            &conn,
28561            "CREATE TABLE all_null_typed_0qo9c (i INTEGER, t TEXT, r REAL);
28562             INSERT INTO all_null_typed_0qo9c (i, t, r) VALUES (NULL, NULL, NULL);",
28563        )
28564        .expect("create and insert");
28565
28566        let frame = read_sql(&conn, "SELECT * FROM all_null_typed_0qo9c")
28567            .expect("read all-null typed table");
28568        assert_eq!(frame.index().len(), 1);
28569
28570        let i_col = frame.column("i").expect("column i must exist");
28571        assert_eq!(i_col.dtype(), crate::DType::Int64);
28572        assert!(i_col.values()[0].is_missing());
28573        let t_col = frame.column("t").expect("column t must exist");
28574        assert_eq!(t_col.dtype(), crate::DType::Utf8);
28575        assert!(t_col.values()[0].is_missing());
28576        let r_col = frame.column("r").expect("column r must exist");
28577        assert_eq!(r_col.dtype(), crate::DType::Float64);
28578        assert!(r_col.values()[0].is_missing());
28579    }
28580}
28581
28582#[cfg(test)]
28583mod fused_numeric_csv_field_tests {
28584    use super::{
28585        CsvTypedColumnValues, fuse_scan_numeric_csv_field, push_csv_default_numeric_field,
28586        push_fused_numeric_csv_field,
28587    };
28588
28589    fn assert_same_columns(base: &CsvTypedColumnValues, fused: &CsvTypedColumnValues, token: &str) {
28590        match (base, fused) {
28591            (CsvTypedColumnValues::Int64(lhs), CsvTypedColumnValues::Int64(rhs)) => {
28592                assert_eq!(lhs, rhs, "Int64 mismatch for token {token:?}");
28593            }
28594            (CsvTypedColumnValues::Float64(lhs), CsvTypedColumnValues::Float64(rhs)) => {
28595                let lhs_bits: Vec<u64> = lhs.iter().map(|value| value.to_bits()).collect();
28596                let rhs_bits: Vec<u64> = rhs.iter().map(|value| value.to_bits()).collect();
28597                assert_eq!(
28598                    lhs_bits, rhs_bits,
28599                    "Float64 bit mismatch for token {token:?}"
28600                );
28601            }
28602            _ => panic!("column dtype diverged for token {token:?}"),
28603        }
28604    }
28605
28606    /// Every token the fused scanner admits must produce bit-identical column
28607    /// state to the `push_csv_default_numeric_field` fallback, for both a
28608    /// fresh Int64 column and a Float64 column, under every terminator.
28609    #[test]
28610    fn fused_field_matches_fallback_parser() {
28611        let tokens = [
28612            "0",
28613            "-0",
28614            "+7",
28615            "007",
28616            "5",
28617            "-5",
28618            "123456789",
28619            "999999999999999999",
28620            "-999999999999999999",
28621            "9007199254740993",
28622            "1234567890123456789",
28623            "0.1",
28624            "-0.1",
28625            "+0.5",
28626            ".5",
28627            "-.5",
28628            "00.5",
28629            "5.",
28630            "12.34",
28631            "123456.7",
28632            "0.30000000000000004",
28633            "9007199254740993.0",
28634            "1e5",
28635            "1E5",
28636            "1.5e-3",
28637            "inf",
28638            "-inf",
28639            "nan",
28640            "NaN",
28641            "",
28642            " 5",
28643            "5 ",
28644            "true",
28645            "false",
28646            "TRUE",
28647            "1.2.3",
28648            "--5",
28649            "+-5",
28650            "1-2",
28651            "#N/A",
28652            "NULL",
28653            "abc",
28654        ];
28655
28656        for token in tokens {
28657            for suffix in ["", ",", "\n", ",9\n"] {
28658                let data = format!("{token}{suffix}");
28659                let Some(field) = fuse_scan_numeric_csv_field(data.as_bytes(), 0) else {
28660                    // Rejected tokens take the unchanged fallback route;
28661                    // nothing further to prove.
28662                    continue;
28663                };
28664                assert_eq!(
28665                    field.end,
28666                    token.len(),
28667                    "fused scanner must stop at the delimiter for token {token:?}"
28668                );
28669
28670                // The fallback must accept every token the fused scanner
28671                // admits (the fused grammar is a strict subset).
28672                let mut base_int = CsvTypedColumnValues::Int64(Vec::new());
28673                assert!(
28674                    push_csv_default_numeric_field(&mut base_int, token.as_bytes()),
28675                    "fallback rejected fused-admitted token {token:?}"
28676                );
28677                let mut fused_int = CsvTypedColumnValues::Int64(Vec::new());
28678                push_fused_numeric_csv_field(&mut fused_int, &field);
28679                assert_same_columns(&base_int, &fused_int, token);
28680
28681                let mut base_float = CsvTypedColumnValues::Float64(Vec::new());
28682                assert!(
28683                    push_csv_default_numeric_field(&mut base_float, token.as_bytes()),
28684                    "fallback Float64 rejected fused-admitted token {token:?}"
28685                );
28686                let mut fused_float = CsvTypedColumnValues::Float64(Vec::new());
28687                push_fused_numeric_csv_field(&mut fused_float, &field);
28688                assert_same_columns(&base_float, &fused_float, token);
28689            }
28690        }
28691    }
28692
28693    /// The sign of zero must survive the fused route on Float64 columns:
28694    /// `-0` parses to -0.0 via `parse_f64_csv_number`, and the fused field
28695    /// must carry the same bits.
28696    #[test]
28697    fn fused_field_preserves_negative_zero_bits() {
28698        let field = fuse_scan_numeric_csv_field(b"-0,", 0).expect("fused scanner must admit -0");
28699        assert_eq!(field.int_value, Some(0));
28700        assert_eq!(field.float_value.to_bits(), (-0.0f64).to_bits());
28701
28702        let mut float_column = CsvTypedColumnValues::Float64(Vec::new());
28703        push_fused_numeric_csv_field(&mut float_column, &field);
28704        match float_column {
28705            CsvTypedColumnValues::Float64(values) => {
28706                assert_eq!(values.len(), 1);
28707                assert_eq!(values[0].to_bits(), (-0.0f64).to_bits());
28708            }
28709            CsvTypedColumnValues::Int64(_) => panic!("expected Float64 column"),
28710        }
28711    }
28712}
28713
28714#[cfg(test)]
28715mod merge_simple_numeric_csv_chunks_tests {
28716    use super::{CsvTypedColumnValues, merge_simple_numeric_csv_chunks};
28717
28718    /// Verbatim copy of the pre-parallel chunk-major merge, kept as the
28719    /// reference implementation for the differential test below.
28720    fn reference_merge(
28721        parsed_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
28722        header_count: usize,
28723    ) -> Option<(Vec<CsvTypedColumnValues>, i64)> {
28724        let mut final_is_float = vec![false; header_count];
28725        let mut row_count = 0i64;
28726        for (columns, rows) in &parsed_chunks {
28727            if columns.len() != header_count {
28728                return None;
28729            }
28730            row_count = row_count.checked_add(*rows)?;
28731            for (idx, column) in columns.iter().enumerate() {
28732                final_is_float[idx] |= matches!(column, CsvTypedColumnValues::Float64(_));
28733            }
28734        }
28735
28736        let capacity = usize::try_from(row_count).ok()?;
28737        let mut merged: Vec<CsvTypedColumnValues> = final_is_float
28738            .into_iter()
28739            .map(|is_float| {
28740                if is_float {
28741                    CsvTypedColumnValues::Float64(Vec::with_capacity(capacity))
28742                } else {
28743                    CsvTypedColumnValues::Int64(Vec::with_capacity(capacity))
28744                }
28745            })
28746            .collect();
28747
28748        for (columns, _) in parsed_chunks {
28749            for (dst, src) in merged.iter_mut().zip(columns) {
28750                match (dst, src) {
28751                    (CsvTypedColumnValues::Int64(dst), CsvTypedColumnValues::Int64(src)) => {
28752                        dst.extend(src);
28753                    }
28754                    (CsvTypedColumnValues::Float64(dst), CsvTypedColumnValues::Int64(src)) => {
28755                        dst.extend(src.into_iter().map(|value| value as f64));
28756                    }
28757                    (CsvTypedColumnValues::Float64(dst), CsvTypedColumnValues::Float64(src)) => {
28758                        dst.extend(src);
28759                    }
28760                    (CsvTypedColumnValues::Int64(_), CsvTypedColumnValues::Float64(_)) => {
28761                        return None;
28762                    }
28763                }
28764            }
28765        }
28766
28767        Some((merged, row_count))
28768    }
28769
28770    fn build_chunks(
28771        chunk_count: usize,
28772        rows_per_chunk: usize,
28773        header_count: usize,
28774        float_from_chunk_for_col: impl Fn(usize) -> usize,
28775    ) -> Vec<(Vec<CsvTypedColumnValues>, i64)> {
28776        (0..chunk_count)
28777            .map(|chunk| {
28778                let columns = (0..header_count)
28779                    .map(|col| {
28780                        if chunk >= float_from_chunk_for_col(col) {
28781                            CsvTypedColumnValues::Float64(
28782                                (0..rows_per_chunk)
28783                                    .map(|row| {
28784                                        (chunk * rows_per_chunk + row) as f64 * 0.5
28785                                            + col as f64 * 1000.0
28786                                    })
28787                                    .collect(),
28788                            )
28789                        } else {
28790                            CsvTypedColumnValues::Int64(
28791                                (0..rows_per_chunk)
28792                                    .map(|row| {
28793                                        (chunk * rows_per_chunk + row) as i64 + col as i64 * 1000
28794                                    })
28795                                    .collect(),
28796                            )
28797                        }
28798                    })
28799                    .collect();
28800                (columns, rows_per_chunk as i64)
28801            })
28802            .collect()
28803    }
28804
28805    fn assert_merge_matches_reference(
28806        chunks: Vec<(Vec<CsvTypedColumnValues>, i64)>,
28807        header_count: usize,
28808    ) {
28809        let reference_chunks: Vec<(Vec<CsvTypedColumnValues>, i64)> = chunks
28810            .iter()
28811            .map(|(columns, rows)| {
28812                let cloned = columns
28813                    .iter()
28814                    .map(|column| match column {
28815                        CsvTypedColumnValues::Int64(values) => {
28816                            CsvTypedColumnValues::Int64(values.clone())
28817                        }
28818                        CsvTypedColumnValues::Float64(values) => {
28819                            CsvTypedColumnValues::Float64(values.clone())
28820                        }
28821                    })
28822                    .collect();
28823                (cloned, *rows)
28824            })
28825            .collect();
28826
28827        let expected = reference_merge(reference_chunks, header_count);
28828        let actual = merge_simple_numeric_csv_chunks(chunks, header_count);
28829
28830        match (expected, actual) {
28831            (None, None) => {}
28832            (Some((expected_columns, expected_rows)), Some((actual_columns, actual_rows))) => {
28833                assert_eq!(expected_rows, actual_rows);
28834                assert_eq!(expected_columns.len(), actual_columns.len());
28835                for (idx, (lhs, rhs)) in expected_columns
28836                    .iter()
28837                    .zip(actual_columns.iter())
28838                    .enumerate()
28839                {
28840                    match (lhs, rhs) {
28841                        (CsvTypedColumnValues::Int64(lhs), CsvTypedColumnValues::Int64(rhs)) => {
28842                            assert_eq!(lhs, rhs, "Int64 column {idx} diverged");
28843                        }
28844                        (
28845                            CsvTypedColumnValues::Float64(lhs),
28846                            CsvTypedColumnValues::Float64(rhs),
28847                        ) => {
28848                            let lhs_bits: Vec<u64> =
28849                                lhs.iter().map(|value| value.to_bits()).collect();
28850                            let rhs_bits: Vec<u64> =
28851                                rhs.iter().map(|value| value.to_bits()).collect();
28852                            assert_eq!(lhs_bits, rhs_bits, "Float64 column {idx} bits diverged");
28853                        }
28854                        _ => panic!("column {idx} dtype diverged"),
28855                    }
28856                }
28857            }
28858            (expected, actual) => panic!(
28859                "merge outcome diverged: reference some={} actual some={}",
28860                expected.is_some(),
28861                actual.is_some()
28862            ),
28863        }
28864    }
28865
28866    /// Big merge (crosses the parallel threshold): mixed promotion patterns —
28867    /// some columns all-Int64, some all-Float64, some promoting mid-stream —
28868    /// must match the chunk-major reference bit-for-bit, in column order.
28869    #[test]
28870    fn parallel_merge_matches_reference_with_mixed_promotion() {
28871        // 8 chunks x 1000 rows x 12 columns = 96000 values > 65536 threshold.
28872        let chunks = build_chunks(8, 1000, 12, |col| match col % 4 {
28873            0 => usize::MAX, // never float: stays Int64
28874            1 => 0,          // float from the first chunk
28875            2 => 3,          // promotes mid-stream
28876            _ => 7,          // promotes at the last chunk
28877        });
28878        assert_merge_matches_reference(chunks, 12);
28879    }
28880
28881    /// Small merge (below the parallel threshold) takes the sequential path
28882    /// and must also match.
28883    #[test]
28884    fn sequential_merge_matches_reference() {
28885        let chunks = build_chunks(3, 4, 5, |col| if col % 2 == 0 { usize::MAX } else { 1 });
28886        assert_merge_matches_reference(chunks, 5);
28887    }
28888
28889    /// More worker groups than columns (header_count < MAX_WORKERS) and
28890    /// header_count not divisible by the group size must keep column order.
28891    #[test]
28892    fn parallel_merge_handles_narrow_and_ragged_widths() {
28893        for header_count in [2, 3, 7, 9, 17] {
28894            let chunks = build_chunks(5, 2000, header_count, |col| col % 6);
28895            assert_merge_matches_reference(chunks, header_count);
28896        }
28897    }
28898
28899    /// A chunk whose column count disagrees with the header must reject, as
28900    /// before.
28901    #[test]
28902    fn merge_rejects_mismatched_chunk_width() {
28903        let mut chunks = build_chunks(2, 10, 4, |_| usize::MAX);
28904        chunks[1].0.pop();
28905        assert!(merge_simple_numeric_csv_chunks(chunks, 4).is_none());
28906    }
28907}
fp_io/lib.rs

fp_io/
lib.rs