Skip to main content

df_derive_core/
lib.rs

1//! Shared runtime trait identity for `df-derive`.
2//!
3//! # What this crate provides
4//!
5//! `df-derive-core` owns the default `dataframe` traits used by the
6//! user-facing `df-derive` facade. Sharing these traits across crates lets
7//! derived models compose as nested `ToDataFrame` types without each crate
8//! inventing a local runtime identity.
9//!
10//! The [`dataframe`] module exposes:
11//!
12//! - [`dataframe::ToDataFrame`] — the per-instance API the derive populates.
13//! - [`dataframe::Columnar`] — the columnar batch API the derive populates.
14//! - [`dataframe::ToDataFrameVec`] — the slice extension trait that routes
15//!   `[T]::to_dataframe()` through `Columnar` or `empty_dataframe`.
16//! - [`dataframe::Decimal128Encode`] — the contract for encoding a decimal
17//!   value as an `i128` mantissa rescaled to a target scale. The reference
18//!   `rust_decimal::Decimal` impl is gated behind the `rust_decimal`
19//!   feature (enabled by default).
20//! - `impl ToDataFrame for ()` and `impl Columnar for ()` — the zero-column
21//!   payload behavior used by generic `Wrapper<()>` shapes.
22//!
23//! # When to use this crate
24//!
25//! Most users get this crate through `df-derive`. Depend on this crate
26//! directly when you want the shared traits without the facade, or when you
27//! use `df-derive-macros` directly and still want the default runtime
28//! identity.
29//!
30//! ```toml
31//! [dependencies]
32//! df-derive-core = "0.3"
33//! df-derive-macros = "0.3"
34//! polars = "0.53"
35//! ```
36//!
37//! Default-runtime generated code uses hidden dependency re-exports from this
38//! crate, so direct `polars-arrow` dependencies are not required unless you
39//! use a custom runtime.
40//!
41//! ```ignore
42//! use df_derive_core::dataframe::{ToDataFrame as _, ToDataFrameVec as _};
43//! use df_derive_macros::ToDataFrame;
44//!
45//! #[derive(ToDataFrame)]
46//! struct Trade { symbol: String, price: f64, size: u64 }
47//! ```
48//!
49//! # Validating a custom decimal backend
50//!
51//! The `Decimal128Encode` contract requires round-half-to-even (banker's
52//! rounding) on scale-down. The reference `rust_decimal::Decimal` impl in
53//! this crate honours that contract and is checked against Polars' decimal
54//! string-cast behavior in this crate's integration tests.
55
56// `polars` pulls a wide transitive dependency tree (ahash, foldhash,
57// hashbrown, windows-sys variants, …) where multiple resolved versions are
58// unavoidable. `clippy::multiple_crate_versions` is part of the
59// `clippy::cargo` group `just lint` enables, and it would fire ~21 times on
60// dependencies entirely outside this crate's control. Allow it here so the
61// lint surface stays focused on this crate's own code.
62#![allow(clippy::multiple_crate_versions)]
63
64pub mod dataframe {
65    use polars::prelude::{AnyValue, DataFrame, DataType, PolarsResult, Series};
66
67    #[cfg(feature = "rust_decimal")]
68    const DECIMAL128_MAX_SCALE: u32 = 38;
69
70    /// Hidden dependency re-exports used by generated code for the default
71    /// dataframe runtime. This is not part of the public API surface.
72    #[doc(hidden)]
73    pub mod __private {
74        pub use polars;
75        pub use polars_arrow;
76    }
77
78    pub trait ToDataFrame {
79        /// # Errors
80        /// Returns an error if `DataFrame` construction fails.
81        fn to_dataframe(&self) -> PolarsResult<DataFrame>;
82        /// # Errors
83        /// Returns an error if `DataFrame` construction fails.
84        fn empty_dataframe() -> PolarsResult<DataFrame>;
85        /// # Errors
86        /// Returns an error if schema generation fails.
87        fn schema() -> PolarsResult<Vec<(String, DataType)>>;
88    }
89
90    /// Columnar batch trait implemented by the derive macro.
91    pub trait Columnar: Sized {
92        /// # Errors
93        /// Returns an error if `DataFrame` construction fails.
94        fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame> {
95            let refs: Vec<&Self> = items.iter().collect();
96            Self::columnar_from_refs(&refs)
97        }
98        /// # Errors
99        /// Returns an error if `DataFrame` construction fails.
100        fn columnar_from_refs(items: &[&Self]) -> PolarsResult<DataFrame>;
101    }
102
103    /// Extension trait enabling `.to_dataframe()` on slices (and `Vec` via auto-deref)
104    pub trait ToDataFrameVec {
105        /// # Errors
106        /// Returns an error if `DataFrame` construction fails.
107        fn to_dataframe(&self) -> PolarsResult<DataFrame>;
108    }
109
110    impl<T> ToDataFrameVec for [T]
111    where
112        T: Columnar + ToDataFrame,
113    {
114        fn to_dataframe(&self) -> PolarsResult<DataFrame> {
115            if self.is_empty() {
116                return <T as ToDataFrame>::empty_dataframe();
117            }
118            <T as Columnar>::columnar_to_dataframe(self)
119        }
120    }
121
122    fn zero_column_dataframe_with_height(n: usize) -> PolarsResult<DataFrame> {
123        let dummy = Series::new_empty("_dummy".into(), &DataType::Null)
124            .extend_constant(AnyValue::Null, n)?;
125        let mut df = DataFrame::new_infer_height(vec![dummy.into()])?;
126        df.drop_in_place("_dummy")?;
127        Ok(df)
128    }
129
130    // Unit-type support for generic payloads such as `Wrapper<()>`. Direct
131    // derived fields of type `()` are rejected by df-derive, but a generic
132    // field instantiated as `()` contributes zero columns. The
133    // `to_dataframe` / `columnar_to_dataframe` paths must still produce a
134    // DataFrame with the correct row count, so we use a temporary dummy
135    // column that is dropped immediately after construction.
136    impl ToDataFrame for () {
137        fn to_dataframe(&self) -> PolarsResult<DataFrame> {
138            zero_column_dataframe_with_height(1)
139        }
140
141        fn empty_dataframe() -> PolarsResult<DataFrame> {
142            DataFrame::new_infer_height(vec![])
143        }
144
145        fn schema() -> PolarsResult<Vec<(String, DataType)>> {
146            Ok(Vec::new())
147        }
148    }
149
150    impl Columnar for () {
151        fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame> {
152            zero_column_dataframe_with_height(items.len())
153        }
154
155        fn columnar_from_refs(items: &[&Self]) -> PolarsResult<DataFrame> {
156            zero_column_dataframe_with_height(items.len())
157        }
158    }
159
160    /// Plug-in trait for converting a decimal value into its `i128`
161    /// mantissa rescaled to a target scale.
162    ///
163    /// Implementers MUST use round-half-to-even (banker's rounding) on
164    /// scale-down so the bytes the derive emits match polars's own
165    /// `str_to_dec128` path. A `None` return surfaces as a polars
166    /// `ComputeError` from the generated code.
167    ///
168    /// The codegen invokes this method through UFCS on the selected trait
169    /// path, so inherent methods with the same name cannot bypass this
170    /// contract. Custom backends (`bigdecimal::BigDecimal`,
171    /// arbitrary-precision types, …) provide their own impls; this crate
172    /// ships a `rust_decimal::Decimal` impl below.
173    pub trait Decimal128Encode {
174        /// Returns the mantissa as `i128` after rescaling `self` to
175        /// `target_scale`, or `None` if the conversion would overflow or
176        /// otherwise violate the schema. Implementations MUST round
177        /// half-to-even on scale-down.
178        fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128>;
179    }
180
181    impl<T> Decimal128Encode for &T
182    where
183        T: Decimal128Encode + ?Sized,
184    {
185        #[inline]
186        fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128> {
187            <T as Decimal128Encode>::try_to_i128_mantissa(*self, target_scale)
188        }
189    }
190
191    /// Reference [`Decimal128Encode`] impl for [`rust_decimal::Decimal`].
192    ///
193    /// Banker's-rounding contract: round-half-to-even on scale-down,
194    /// `checked_mul` overflow-to-`None` on scale-up. This impl is verified
195    /// against polars's `str_to_dec128` on a battery of inputs covering
196    /// half-tie boundaries (positive and negative), large magnitudes, and
197    /// scale-up overflow by this repository's `df-derive-core` integration
198    /// tests.
199    #[cfg(feature = "rust_decimal")]
200    impl Decimal128Encode for rust_decimal::Decimal {
201        #[inline]
202        fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128> {
203            // Bounds: `rust_decimal::Decimal::scale()` is capped at 28,
204            // polars caps decimal scale at `DECIMAL128_MAX_SCALE`, so the
205            // scale-up `diff` is at most 38 and the scale-down `diff` is at
206            // most 28.
207            // `10i128.pow(diff)` therefore fits in i128 for either direction
208            // (max `10^38 < 2^127`).
209            if target_scale > DECIMAL128_MAX_SCALE {
210                return None;
211            }
212
213            let source_scale = self.scale();
214            let mantissa: i128 = self.mantissa();
215            if source_scale == target_scale {
216                return Some(mantissa);
217            }
218            if source_scale < target_scale {
219                let diff = target_scale - source_scale;
220                let pow = 10i128.pow(diff);
221                return mantissa.checked_mul(pow);
222            }
223            // Scale-down with round-half-to-even on the unsigned magnitude,
224            // then re-apply sign — matches polars's `div_128_pow10`
225            // semantics. The `(abs / pow)` quotient cannot exceed `i128::MAX`
226            // because `abs <= i128::MAX as u128` and `pow >= 1`, so the
227            // `cast_signed` is value-preserving.
228            let diff = source_scale - target_scale;
229            let pow = 10i128.pow(diff).cast_unsigned();
230            let neg = mantissa < 0;
231            let abs = mantissa.unsigned_abs();
232            let q = (abs / pow).cast_signed();
233            let r = abs % pow;
234            let half = pow / 2;
235            let rounded = match r.cmp(&half) {
236                ::std::cmp::Ordering::Greater => q + 1,
237                ::std::cmp::Ordering::Less => q,
238                ::std::cmp::Ordering::Equal => q + (q & 1),
239            };
240            Some(if neg { -rounded } else { rounded })
241        }
242    }
243}