dbcli 0.1.0

Convert SQL query results to JSON without struct mapping, supporting MySQL/PostgreSQL/SQLite/Odbc
Documentation
//! # ODBC → JSON Conversion Module
//!
//! Provides functionality to convert an ODBC [`odbc_api::Cursor`] result set into
//! [`serde_json::Value`]. The [`to_json`] function processes an entire query result
//! set and returns both a JSON array and column metadata.
//!
//! Unlike the `sqlx`-based drivers, ODBC is a synchronous API and does not use
//! row type structs. Each row is iterated directly via [`odbc_api::Cursor::next_row`]
//! and columns are read by index.
//!
//! Type mappings follow the [`odbc_api::DataType`] enum variants:
//!
//! | ODBC DataType | JSON output | ColumnBaseInfo.type |
//! |---|---|---|
//! | Integer / SmallInt / TinyInt / BigInt | Number (i64) | `"INTEGER"` |
//! | Float / Double / Real | Number (f64) | `"FLOAT"` |
//! | Decimal / Numeric | String (precision preserved) | `"DECIMAL"` |
//! | Bit | Bool (Access: -1/1 = true, 0 = false) | `"BIT"` |
//! | Char / Varchar / LongVarchar / WChar / WVarchar / WLongVarchar | String (auto-encoding) | `"TEXT"` |
//! | Date | String | `"DATE"` |
//! | Time | String | `"TIME"` |
//! | Timestamp | String | `"TIMESTAMP"` |
//! | Binary / Varbinary / LongVarbinary | String (Base64) | `"BINARY"` |
//! | Other | String (get_text fallback) | `"TEXT"` |
//!
//! # Example
//!
//! ```rust,no_run
//! use odbc_api::{Environment, ConnectionOptions};
//!
//! fn example() -> anyhow::Result<()> {
//!     let env = Environment::new()?;
//!     let conn = env.connect_with_connection_string(
//!         "Driver={...};DBQ=path/to/file.mdb;",
//!         ConnectionOptions::default(),
//!     )?;
//!     if let Some(mut cursor) = conn.execute("SELECT * FROM MyTable", (), None)? {
//!         let (data, columns) = dbcli::to_json::odbc::to_json(&mut cursor)?;
//!         println!("{}", serde_json::to_string_pretty(&data)?);
//!     }
//!     Ok(())
//! }
//! ```

use base64::{engine::general_purpose, Engine};
use odbc_api::{ColumnDescription, Cursor, CursorRow, DataType, Nullable};
use serde_json::Value as JsonValue;

use crate::column_info::ColumnBaseInfo;
use crate::decode::decode_auto;
use super::f64_to_json_safe;

/// Function pointer type for parsing a single ODBC cell.
/// `col_index` is 1-based (ODBC convention).
type OdbcCellParser = for<'a> fn(row: &mut CursorRow<'a>, col_index: u16) -> anyhow::Result<JsonValue>;

/// ODBC row parse result, containing per-column parse function pointers and column metadata.
pub struct OdbcRowParse {
    pub methods: Vec<OdbcCellParser>,
    pub columns: Vec<ColumnBaseInfo>,
}

/// Determines parsing methods for each column based on column descriptions.
/// Called once before row iteration.
fn determine_parsing_methods(
    column_names: &[String],
    column_descriptions: &[ColumnDescription],
) -> OdbcRowParse {
    let mut methods = Vec::with_capacity(column_descriptions.len());
    let mut columns = Vec::with_capacity(column_descriptions.len());

    for (i, desc) in column_descriptions.iter().enumerate() {
        let type_str = data_type_to_str(&desc.data_type);
        let method: OdbcCellParser = match type_str {
            "INTEGER" => parse_integer,
            "FLOAT" => parse_float,
            "DECIMAL" => parse_decimal,
            "BIT" => parse_bit,
            "BINARY" => parse_binary,
            // TEXT, DATE, TIME, TIMESTAMP, and everything else
            _ => parse_text,
        };
        methods.push(method);
        columns.push(ColumnBaseInfo {
            name: column_names[i].clone(),
            r#type: type_str.to_string(),
            index: i as u64,
        });
    }

    OdbcRowParse { methods, columns }
}

/// Normalize column names for ODBC result sets.
///
/// Handles two edge cases common with Excel and other non-database ODBC sources:
/// - **Empty names**: replaced with `"col_1"`, `"col_2"`, etc. (1-based index)
/// - **Duplicate names**: disambiguated by appending `"_2"`, `"_3"`, etc.
fn normalize_column_names(raw_names: Vec<String>) -> Vec<String> {
    let mut result = Vec::with_capacity(raw_names.len());
    let mut seen = std::collections::HashMap::<String, usize>::new();

    for (i, name) in raw_names.into_iter().enumerate() {
        // Replace empty names with positional placeholder.
        let base = if name.trim().is_empty() {
            format!("col_{}", i + 1)
        } else {
            name
        };

        // Deduplicate: track occurrence count and append suffix if needed.
        let count = seen.entry(base.clone()).or_insert(0);
        *count += 1;
        let final_name = if *count == 1 {
            base
        } else {
            format!("{}_{}", base, count)
        };

        result.push(final_name);
    }

    result
}

/// Converts an ODBC cursor result set into a JSON array.
///
/// Reads all rows from the given `cursor` and converts each row to a JSON object,
/// keyed by column name. Also returns column metadata.
///
/// Returns `(vec![], vec![])` immediately when the cursor contains no columns.
///
/// # Arguments
///
/// * `cursor` - A mutable reference to an open ODBC cursor positioned before the first row.
///
/// # Returns
///
/// A tuple of:
/// - `Vec<serde_json::Value>`: one JSON object per row
/// - `Vec<ColumnBaseInfo>`: column metadata (name, type string, index)
///
/// # Errors
///
/// Returns an error if ODBC API calls fail (e.g., column description, row iteration,
/// or cell data extraction).
pub fn to_json(
    cursor: &mut impl Cursor,
) -> anyhow::Result<(Vec<JsonValue>, Vec<ColumnBaseInfo>)> {
    let cols = cursor.num_result_cols()?;
    if cols == 0 {
        return Ok((vec![], vec![]));
    }

    // Collect column names and normalize: fill empty names, deduplicate.
    let raw_names: Vec<String> = cursor
        .column_names()?
        .collect::<Result<Vec<_>, _>>()?;
    let column_names = normalize_column_names(raw_names);

    // Collect column descriptions to determine type mapping (1-based index in ODBC)
    let column_descriptions: Vec<ColumnDescription> = (1..=cols)
        .map(|col_index| {
            let mut description = ColumnDescription::default();
            cursor.describe_col(col_index as u16, &mut description)?;
            Ok(description)
        })
        .collect::<Result<_, odbc_api::Error>>()?;

    // Determine parsing methods once, then apply to all rows
    let parse = determine_parsing_methods(&column_names, &column_descriptions);
    let methods = &parse.methods;
    let columns = parse.columns;

    let mut data: Vec<JsonValue> = Vec::new();

    while let Some(mut row) = cursor.next_row()? {
        let mut map = serde_json::Map::with_capacity(cols as usize);
        for (i, method) in methods.iter().enumerate() {
            let col_index = i as u16 + 1; // ODBC is 1-based
            let value = method(&mut row, col_index)?;
            map.insert(columns[i].name.clone(), value);
        }
        data.push(JsonValue::Object(map));
    }

    Ok((data, columns))
}

/// Maps an ODBC [`DataType`] to the canonical uppercase type string used in [`ColumnBaseInfo`].
fn data_type_to_str(dt: &DataType) -> &'static str {
    match dt {
        DataType::Integer | DataType::SmallInt | DataType::TinyInt | DataType::BigInt => "INTEGER",
        DataType::Float { .. } | DataType::Double | DataType::Real => "FLOAT",
        DataType::Decimal { .. } | DataType::Numeric { .. } => "DECIMAL",
        DataType::Bit => "BIT",
        DataType::Char { .. }
        | DataType::Varchar { .. }
        | DataType::LongVarchar { .. }
        | DataType::WChar { .. }
        | DataType::WVarchar { .. }
        | DataType::WLongVarchar { .. } => "TEXT",
        DataType::Date => "DATE",
        DataType::Time { .. } => "TIME",
        DataType::Timestamp { .. } => "TIMESTAMP",
        DataType::Binary { .. } | DataType::Varbinary { .. } | DataType::LongVarbinary { .. } => {
            "BINARY"
        }
        _ => "TEXT",
    }
}

/// Parses an INTEGER cell (SmallInt / TinyInt / BigInt included).
///
/// Reads as text then parses to i64. NULL or empty yields [`JsonValue::Null`].
fn parse_integer(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut buf: Vec<u8> = Vec::new();
    let not_null = row.get_text(col_index, &mut buf)?;
    if !not_null || buf.is_empty() {
        return Ok(JsonValue::Null);
    }
    let s = decode_auto(&buf);
    if s.is_empty() {
        Ok(JsonValue::Null)
    } else if let Ok(n) = s.parse::<i64>() {
        Ok(JsonValue::Number(n.into()))
    } else {
        // Fallback: non-parseable but non-empty
        Ok(JsonValue::String(s))
    }
}

/// Parses a FLOAT cell (Double / Real included).
///
/// Uses [`Nullable<f64>`] to properly detect NULL, with a text-fallback for edge cases.
fn parse_float(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut target = Nullable::<f64>::null();
    row.get_data(col_index, &mut target)?;
    match target.into_opt() {
        Some(f) => Ok(f64_to_json_safe(f)),
        None => {
            // Fall back to text parsing (handles edge cases like NaN strings)
            let mut buf: Vec<u8> = Vec::new();
            let not_null = row.get_text(col_index, &mut buf)?;
            if !not_null || buf.is_empty() {
                Ok(JsonValue::Null)
            } else {
                let s = decode_auto(&buf);
                Ok(s.parse::<f64>()
                    .map(f64_to_json_safe)
                    .unwrap_or(JsonValue::Null))
            }
        }
    }
}

/// Parses a DECIMAL / NUMERIC cell.
///
/// Preserves precision by returning the value as a string to avoid float rounding.
fn parse_decimal(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut buf: Vec<u8> = Vec::new();
    let not_null = row.get_text(col_index, &mut buf)?;
    if !not_null || buf.is_empty() {
        return Ok(JsonValue::Null);
    }
    let s = decode_auto(&buf);
    if s.is_empty() {
        Ok(JsonValue::Null)
    } else {
        Ok(JsonValue::String(s))
    }
}

/// Parses a BIT cell.
///
/// MS Access stores Yes/No as -1/1 (true) and 0 (false).
fn parse_bit(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut buf: Vec<u8> = Vec::new();
    let not_null = row.get_text(col_index, &mut buf)?;
    if !not_null || buf.is_empty() {
        return Ok(JsonValue::Null);
    }
    let s = decode_auto(&buf);
    Ok(match s.as_str() {
        "1" | "-1" => JsonValue::Bool(true),
        "0" => JsonValue::Bool(false),
        other if !other.is_empty() => JsonValue::String(other.to_owned()),
        _ => JsonValue::Null,
    })
}

/// Parses a BINARY / VARBINARY / LONGVARBINARY cell.
///
/// Base64-encodes the raw bytes into a JSON string. NULL or empty yields [`JsonValue::Null`].
fn parse_binary(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut buf: Vec<u8> = Vec::new();
    let not_null = row.get_binary(col_index, &mut buf)?;
    if !not_null || buf.is_empty() {
        Ok(JsonValue::Null)
    } else {
        Ok(JsonValue::String(general_purpose::STANDARD.encode(&buf)))
    }
}

/// Parses a TEXT / DATE / TIME / TIMESTAMP / unknown cell.
///
/// Uses `get_text` with automatic encoding detection. NULL or empty yields [`JsonValue::Null`].
fn parse_text(row: &mut CursorRow<'_>, col_index: u16) -> anyhow::Result<JsonValue> {
    let mut buf: Vec<u8> = Vec::new();
    let not_null = row.get_text(col_index, &mut buf)?;
    if !not_null || buf.is_empty() {
        return Ok(JsonValue::Null);
    }
    let s = decode_auto(&buf);
    if s.is_empty() {
        Ok(JsonValue::Null)
    } else {
        Ok(JsonValue::String(s))
    }
}