df_derive_core/lib.rs
1//! Shared runtime trait identity for `df-derive`.
2//!
3//! # What this crate provides
4//!
5//! `df-derive-core` owns the default `dataframe` traits used by the
6//! user-facing `df-derive` facade. Sharing these traits across crates lets
7//! derived models compose as nested `ToDataFrame` types without each crate
8//! inventing a local runtime identity.
9//!
10//! The [`dataframe`] module exposes:
11//!
12//! - [`dataframe::ToDataFrame`] — the per-instance API the derive populates.
13//! - [`dataframe::Columnar`] — the columnar batch API the derive populates.
14//! - [`dataframe::ToDataFrameVec`] — the slice extension trait that routes
15//! `[T]::to_dataframe()` through `Columnar` or `empty_dataframe`.
16//! - [`dataframe::Decimal128Encode`] — the contract for encoding a decimal
17//! value as an `i128` mantissa rescaled to a target scale. The reference
18//! `rust_decimal::Decimal` impl is gated behind the `rust_decimal`
19//! feature (enabled by default).
20//! - `impl ToDataFrame for ()` and `impl Columnar for ()` — the zero-column
21//! payload behavior used by generic `Wrapper<()>` shapes.
22//!
23//! # When to use this crate
24//!
25//! Most users get this crate through `df-derive`. Depend on this crate
26//! directly when you want the shared traits without the facade, or when you
27//! use `df-derive-macros` directly and still want the default runtime
28//! identity.
29//!
30//! ```toml
31//! [dependencies]
32//! df-derive-core = "0.3"
33//! df-derive-macros = "0.3"
34//! polars = "0.53"
35//! ```
36//!
37//! Default-runtime generated code uses hidden dependency re-exports from this
38//! crate, so direct `polars-arrow` dependencies are not required unless you
39//! use a custom runtime.
40//!
41//! ```ignore
42//! use df_derive_core::dataframe::{ToDataFrame as _, ToDataFrameVec as _};
43//! use df_derive_macros::ToDataFrame;
44//!
45//! #[derive(ToDataFrame)]
46//! struct Trade { symbol: String, price: f64, size: u64 }
47//! ```
48//!
49//! # Validating a custom decimal backend
50//!
51//! The `Decimal128Encode` contract requires round-half-to-even (banker's
52//! rounding) on scale-down. The reference `rust_decimal::Decimal` impl in
53//! this crate honours that contract and is checked against Polars' decimal
54//! string-cast behavior in this crate's integration tests.
55
56// `polars` pulls a wide transitive dependency tree (ahash, foldhash,
57// hashbrown, windows-sys variants, …) where multiple resolved versions are
58// unavoidable. `clippy::multiple_crate_versions` is part of the
59// `clippy::cargo` group `just lint` enables, and it would fire ~21 times on
60// dependencies entirely outside this crate's control. Allow it here so the
61// lint surface stays focused on this crate's own code.
62#![allow(clippy::multiple_crate_versions)]
63
64pub mod dataframe {
65 use polars::prelude::{AnyValue, DataFrame, DataType, PolarsResult, Series};
66
67 #[cfg(feature = "rust_decimal")]
68 const DECIMAL128_MAX_SCALE: u32 = 38;
69
70 /// Hidden dependency re-exports used by generated code for the default
71 /// dataframe runtime. This is not part of the public API surface.
72 #[doc(hidden)]
73 pub mod __private {
74 pub use polars;
75 pub use polars_arrow;
76 }
77
78 pub trait ToDataFrame {
79 /// # Errors
80 /// Returns an error if `DataFrame` construction fails.
81 fn to_dataframe(&self) -> PolarsResult<DataFrame>;
82 /// # Errors
83 /// Returns an error if `DataFrame` construction fails.
84 fn empty_dataframe() -> PolarsResult<DataFrame>;
85 /// # Errors
86 /// Returns an error if schema generation fails.
87 fn schema() -> PolarsResult<Vec<(String, DataType)>>;
88 }
89
90 /// Columnar batch trait implemented by the derive macro.
91 pub trait Columnar: Sized {
92 /// # Errors
93 /// Returns an error if `DataFrame` construction fails.
94 fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame> {
95 let refs: Vec<&Self> = items.iter().collect();
96 Self::columnar_from_refs(&refs)
97 }
98 /// # Errors
99 /// Returns an error if `DataFrame` construction fails.
100 fn columnar_from_refs(items: &[&Self]) -> PolarsResult<DataFrame>;
101 }
102
103 /// Extension trait enabling `.to_dataframe()` on slices (and `Vec` via auto-deref)
104 pub trait ToDataFrameVec {
105 /// # Errors
106 /// Returns an error if `DataFrame` construction fails.
107 fn to_dataframe(&self) -> PolarsResult<DataFrame>;
108 }
109
110 impl<T> ToDataFrameVec for [T]
111 where
112 T: Columnar + ToDataFrame,
113 {
114 fn to_dataframe(&self) -> PolarsResult<DataFrame> {
115 if self.is_empty() {
116 return <T as ToDataFrame>::empty_dataframe();
117 }
118 <T as Columnar>::columnar_to_dataframe(self)
119 }
120 }
121
122 fn zero_column_dataframe_with_height(n: usize) -> PolarsResult<DataFrame> {
123 let dummy = Series::new_empty("_dummy".into(), &DataType::Null)
124 .extend_constant(AnyValue::Null, n)?;
125 let mut df = DataFrame::new_infer_height(vec![dummy.into()])?;
126 df.drop_in_place("_dummy")?;
127 Ok(df)
128 }
129
130 // Unit-type support for generic payloads such as `Wrapper<()>`. Direct
131 // derived fields of type `()` are rejected by df-derive, but a generic
132 // field instantiated as `()` contributes zero columns. The
133 // `to_dataframe` / `columnar_to_dataframe` paths must still produce a
134 // DataFrame with the correct row count, so we use a temporary dummy
135 // column that is dropped immediately after construction.
136 impl ToDataFrame for () {
137 fn to_dataframe(&self) -> PolarsResult<DataFrame> {
138 zero_column_dataframe_with_height(1)
139 }
140
141 fn empty_dataframe() -> PolarsResult<DataFrame> {
142 DataFrame::new_infer_height(vec![])
143 }
144
145 fn schema() -> PolarsResult<Vec<(String, DataType)>> {
146 Ok(Vec::new())
147 }
148 }
149
150 impl Columnar for () {
151 fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame> {
152 zero_column_dataframe_with_height(items.len())
153 }
154
155 fn columnar_from_refs(items: &[&Self]) -> PolarsResult<DataFrame> {
156 zero_column_dataframe_with_height(items.len())
157 }
158 }
159
160 /// Plug-in trait for converting a decimal value into its `i128`
161 /// mantissa rescaled to a target scale.
162 ///
163 /// Implementers MUST use round-half-to-even (banker's rounding) on
164 /// scale-down so the bytes the derive emits match polars's own
165 /// `str_to_dec128` path. A `None` return surfaces as a polars
166 /// `ComputeError` from the generated code.
167 ///
168 /// The codegen invokes this method through UFCS on the selected trait
169 /// path, so inherent methods with the same name cannot bypass this
170 /// contract. Custom backends (`bigdecimal::BigDecimal`,
171 /// arbitrary-precision types, …) provide their own impls; this crate
172 /// ships a `rust_decimal::Decimal` impl below.
173 pub trait Decimal128Encode {
174 /// Returns the mantissa as `i128` after rescaling `self` to
175 /// `target_scale`, or `None` if the conversion would overflow or
176 /// otherwise violate the schema. Implementations MUST round
177 /// half-to-even on scale-down.
178 fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128>;
179 }
180
181 impl<T> Decimal128Encode for &T
182 where
183 T: Decimal128Encode + ?Sized,
184 {
185 #[inline]
186 fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128> {
187 <T as Decimal128Encode>::try_to_i128_mantissa(*self, target_scale)
188 }
189 }
190
191 /// Reference [`Decimal128Encode`] impl for [`rust_decimal::Decimal`].
192 ///
193 /// Banker's-rounding contract: round-half-to-even on scale-down,
194 /// `checked_mul` overflow-to-`None` on scale-up. This impl is verified
195 /// against polars's `str_to_dec128` on a battery of inputs covering
196 /// half-tie boundaries (positive and negative), large magnitudes, and
197 /// scale-up overflow by this repository's `df-derive-core` integration
198 /// tests.
199 #[cfg(feature = "rust_decimal")]
200 impl Decimal128Encode for rust_decimal::Decimal {
201 #[inline]
202 fn try_to_i128_mantissa(&self, target_scale: u32) -> Option<i128> {
203 // Bounds: `rust_decimal::Decimal::scale()` is capped at 28,
204 // polars caps decimal scale at `DECIMAL128_MAX_SCALE`, so the
205 // scale-up `diff` is at most 38 and the scale-down `diff` is at
206 // most 28.
207 // `10i128.pow(diff)` therefore fits in i128 for either direction
208 // (max `10^38 < 2^127`).
209 if target_scale > DECIMAL128_MAX_SCALE {
210 return None;
211 }
212
213 let source_scale = self.scale();
214 let mantissa: i128 = self.mantissa();
215 if source_scale == target_scale {
216 return Some(mantissa);
217 }
218 if source_scale < target_scale {
219 let diff = target_scale - source_scale;
220 let pow = 10i128.pow(diff);
221 return mantissa.checked_mul(pow);
222 }
223 // Scale-down with round-half-to-even on the unsigned magnitude,
224 // then re-apply sign — matches polars's `div_128_pow10`
225 // semantics. The `(abs / pow)` quotient cannot exceed `i128::MAX`
226 // because `abs <= i128::MAX as u128` and `pow >= 1`, so the
227 // `cast_signed` is value-preserving.
228 let diff = source_scale - target_scale;
229 let pow = 10i128.pow(diff).cast_unsigned();
230 let neg = mantissa < 0;
231 let abs = mantissa.unsigned_abs();
232 let q = (abs / pow).cast_signed();
233 let r = abs % pow;
234 let half = pow / 2;
235 let rounded = match r.cmp(&half) {
236 ::std::cmp::Ordering::Greater => q + 1,
237 ::std::cmp::Ordering::Less => q,
238 ::std::cmp::Ordering::Equal => q + (q & 1),
239 };
240 Some(if neg { -rounded } else { rounded })
241 }
242 }
243}