df_derive/lib.rs
1//! df-derive – derive fast conversions from your Rust types to Polars `DataFrame`
2//!
3//! ## What this crate does
4//!
5//! Deriving `ToDataFrame` on your structs and tuple structs generates fast, allocation-conscious
6//! code to:
7//!
8//! - Convert a single value to a `polars::prelude::DataFrame`
9//! - Convert a slice of values via a columnar path (efficient batch conversion)
10//! - Inspect the schema (column names and `DataType`s) at compile time via a generated method
11//!
12//! It supports nested structs (flattened with dot notation), `Option<T>`, `Vec<T>`, tuple structs,
13//! and key domain types like `chrono::DateTime<Utc>` and `rust_decimal::Decimal`.
14//!
15//! ## Installation
16//!
17//! Add the macro crate and Polars. You will also need a trait defining the `to_dataframe` behavior
18//! (you can use your own runtime crate/traits; see the override section below). For a minimal inline
19//! trait you can copy, see the Quick start example.
20//!
21//! ```toml
22//! [dependencies]
23//! df-derive = "0.2.0"
24//! polars = { version = "0.50", features = ["timezones", "dtype-decimal"] }
25//!
26//! # If you use these types in your models
27//! chrono = { version = "0.4", features = ["serde"] }
28//! rust_decimal = { version = "1.36", features = ["serde"] }
29//! ```
30//!
31//! ## Quick start
32//!
33//! Copy-paste runnable example without any external runtime traits. This is a complete working
34//! example that you can run with `cargo run --example quickstart`. In your own project, place the
35//! `dataframe` traits wherever you like and point the derive macro to them (see
36//! "Crate path override").
37//!
38//! ```rust
39//! use df_derive::ToDataFrame;
40//!
41//! mod dataframe {
42//! use polars::prelude::{DataFrame, DataType, PolarsResult};
43//!
44//! pub trait ToDataFrame {
45//! fn to_dataframe(&self) -> PolarsResult<DataFrame>;
46//! fn empty_dataframe() -> PolarsResult<DataFrame>;
47//! fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
48//! }
49//!
50//! pub trait Columnar: Sized {
51//! fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
52//! }
53//! }
54//!
55//! #[derive(ToDataFrame)]
56//! #[df_derive(trait = "crate::dataframe::ToDataFrame")] // Columnar path auto-infers
57//! struct Trade { symbol: String, price: f64, size: u64 }
58//!
59//! fn main() -> polars::prelude::PolarsResult<()> {
60//! let t = Trade { symbol: "AAPL".into(), price: 187.23, size: 100 };
61//! let df_single = <Trade as crate::dataframe::ToDataFrame>::to_dataframe(&t)?;
62//! println!("{}", df_single);
63//! Ok(())
64//! }
65//! ```
66//!
67//! ## Features
68//!
69//! - **Nested structs (flattening)**: fields of nested structs appear as `outer.inner` columns
70//! - **Vec of primitives and structs**: becomes Polars `List` columns; `Vec<Nested>` becomes
71//! multiple `outer.subfield` list columns
72//! - **`Option<T>`**: null-aware materialization for both scalars and lists
73//! - **Tuple structs**: supported; columns are named `field_0`, `field_1`, ...
74//! - **Empty structs**: produce `(1, 0)` for instances and `(0, 0)` for empty frames
75//! - **Schema discovery**: `T::schema() -> Vec<(&'static str, DataType)>`
76//! - **Columnar batch conversion**: `[T]` via a generated `Columnar` implementation
77//!
78//! ### Attribute helpers
79//!
80//! Use `#[df_derive(as_string)]` to stringify values during conversion. This is particularly useful
81//! for enums:
82//!
83//! ```rust
84//! use df_derive::ToDataFrame;
85//!
86//! // Minimal runtime traits used by the derive macro
87//! mod dataframe {
88//! use polars::prelude::{DataFrame, DataType, PolarsResult};
89//! pub trait ToDataFrame {
90//! fn to_dataframe(&self) -> PolarsResult<DataFrame>;
91//! fn empty_dataframe() -> PolarsResult<DataFrame>;
92//! fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
93//! }
94//! pub trait Columnar: Sized {
95//! fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
96//! }
97//! }
98//!
99//! #[derive(Clone, Debug, PartialEq)]
100//! enum Status { Active, Inactive }
101//!
102//! impl std::fmt::Display for Status {
103//! fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104//! match self {
105//! Status::Active => write!(f, "Active"),
106//! Status::Inactive => write!(f, "Inactive"),
107//! }
108//! }
109//! }
110//!
111//! #[derive(ToDataFrame)]
112//! #[df_derive(trait = "crate::dataframe::ToDataFrame")]
113//! struct WithEnums {
114//! #[df_derive(as_string)]
115//! status: Status,
116//! #[df_derive(as_string)]
117//! opt_status: Option<Status>,
118//! #[df_derive(as_string)]
119//! statuses: Vec<Status>,
120//! }
121//!
122//! fn main() {}
123//! ```
124//!
125//! Columns will use `DataType::String` (or `List<String>` for `Vec<_>`), and values are produced via
126//! `ToString`.
127//!
128//! ## Supported types
129//!
130//! - **Primitives**: `String`, `bool`, integer types (`i8/i16/i32/i64/isize`, `u8/u16/u32/u64/usize`),
131//! `f32`, `f64`
132//! - **Time**: `chrono::DateTime<Utc>` → materialized as `Datetime(Milliseconds, None)`
133//! - **Decimal**: `rust_decimal::Decimal` → `Decimal(38, 10)`
134//! - **Wrappers**: `Option<T>`, `Vec<T>` in any nesting order
135//! - **Custom structs**: any other struct deriving `ToDataFrame` (supports nesting and `Vec<Nested>`,
136//! yielding prefixed list columns)
137//! - **Tuple structs**: unnamed fields are emitted as `field_{index}`
138//!
139//! ## Column naming
140//!
141//! - Named struct fields: `field_name`
142//! - Nested structs: `outer.inner` (recursively)
143//! - Vec of custom structs: `vec_field.subfield` (list dtype)
144//! - Tuple structs: `field_0`, `field_1`, ...
145//!
146//! ## Generated API
147//!
148//! For every `#[derive(ToDataFrame)]` type `T` the macro generates implementations of two traits
149//! (paths configurable via `#[df_derive(...)]`):
150//!
151//! - `ToDataFrame` for `T`:
152//! - `fn to_dataframe(&self) -> PolarsResult<DataFrame>`
153//! - `fn empty_dataframe() -> PolarsResult<DataFrame>`
154//! - `fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>`
155//! - `Columnar` for `T`:
156//! - `fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>`
157//!
158//! Empty-struct behavior:
159//!
160//! - `to_dataframe(&self)` produces a single-row `DataFrame` with zero columns
161//! - `empty_dataframe()` produces a `(0, 0)` `DataFrame`
162//! - `columnar_to_dataframe(&[T])` produces a zero-column `DataFrame` with `items.len()` rows
163//!
164//! ## Examples
165//!
166//! This crate includes several runnable examples in the `examples/` directory:
167//!
168//! - `quickstart` — Basic usage with single and batch `DataFrame` conversion
169//! - `nested` — Nested structs with dot notation column naming
170//! - `vec_custom` — Vec of custom structs creating List columns
171//! - `tuple` — Tuple structs with `field_0`, `field_1` naming
172//! - `datetime_decimal` — `DateTime` and `Decimal` type support
173//! - `as_string` — `#[df_derive(as_string)]` attribute for enum conversion
174//!
175//! ## Limitations and guidance
176//!
177//! - **Unsupported container types**: maps/sets like `HashMap<_, _>` are not supported
178//! - **Enums**: derive on enums is not supported; use `#[df_derive(as_string)]` on enum fields
179//! - **Generics**: generic structs are not supported by the derive (see `tests/fail`)
180//! - **All nested types must also derive**: if you nest a struct, it must also derive `ToDataFrame`
181//!
182//! ## Performance notes
183//!
184//! The derive implements an internal `Columnar` path used by runtimes to convert slices efficiently,
185//! avoiding per-row `DataFrame` builds. Criterion benches in `benches/` exercise wide, deep, and
186//! nested-Vec shapes (100k+ rows), demonstrating consistent performance across shapes.
187//!
188//! ## Compatibility
189//!
190//! - **Rust edition**: 2024
191//! - **Polars**: 0.50 (tested). Enable Polars features `timezones` and `dtype-decimal` if you use
192//! `DateTime<Utc>` or `Decimal`.
193//!
194//! ## License
195//!
196//! MIT. See `LICENSE`.
197//!
198//! ## Crate path override (about paft)
199//!
200//! By default, the macro resolves trait paths to a `dataframe` module under the `paft` ecosystem.
201//! Concretely, it attempts to implement `paft::dataframe::ToDataFrame` and
202//! `paft::dataframe::Columnar` (or `paft-core::dataframe::...`) if those crates are present. You can
203//! override these paths for any runtime by annotating your type with `#[df_derive(...)]`:
204//!
205//! ```rust
206//! use df_derive::ToDataFrame;
207//!
208//! // Define a local runtime with the expected traits
209//! mod my_runtime { pub mod dataframe {
210//! use polars::prelude::{DataFrame, DataType, PolarsResult};
211//! pub trait ToDataFrame {
212//! fn to_dataframe(&self) -> PolarsResult<DataFrame>;
213//! fn empty_dataframe() -> PolarsResult<DataFrame>;
214//! fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
215//! }
216//! pub trait Columnar: Sized {
217//! fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
218//! }
219//! }}
220//!
221//! #[derive(ToDataFrame)]
222//! #[df_derive(trait = "my_runtime::dataframe::ToDataFrame")] // Columnar inferred
223//! struct MyType {}
224//!
225//! fn main() {}
226//! ```
227//!
228//! If you need to override both explicitly:
229//!
230//! ```rust
231//! use df_derive::ToDataFrame;
232//!
233//! // Define a local runtime with the expected traits
234//! mod my_runtime { pub mod dataframe {
235//! use polars::prelude::{DataFrame, DataType, PolarsResult};
236//! pub trait ToDataFrame {
237//! fn to_dataframe(&self) -> PolarsResult<DataFrame>;
238//! fn empty_dataframe() -> PolarsResult<DataFrame>;
239//! fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
240//! }
241//! pub trait Columnar: Sized {
242//! fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
243//! }
244//! }}
245//!
246//! #[derive(ToDataFrame)]
247//! #[df_derive(
248//! trait = "my_runtime::dataframe::ToDataFrame",
249//! columnar = "my_runtime::dataframe::Columnar",
250//! )]
251//! struct MyType {}
252//!
253//! fn main() {}
254//! ```
255#![warn(missing_docs)]
256extern crate proc_macro;
257
258mod codegen;
259mod ir;
260mod parser;
261mod type_analysis;
262use proc_macro::TokenStream;
263use quote::quote;
264use syn::{DeriveInput, parse_macro_input};
265
266/// Derive `ToDataFrame` for structs and tuple structs to generate fast conversions to Polars.
267///
268/// What this macro generates (paths configurable via `#[df_derive(...)]`):
269///
270/// - An implementation of `ToDataFrame` for the annotated type `T` providing:
271/// - `fn to_dataframe(&self) -> PolarsResult<DataFrame>`
272/// - `fn empty_dataframe() -> PolarsResult<DataFrame>`
273/// - `fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>`
274/// - An implementation of `Columnar` for `T` providing
275/// `fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>`
276///
277/// Supported shapes and types:
278///
279/// - Named and tuple structs (tuple fields are named `field_{index}`)
280/// - Nested structs are flattened using dot notation (e.g., `outer.inner`)
281/// - Wrappers `Option<T>` and `Vec<T>` in any nesting order, with `Vec<Struct>` producing multiple
282/// list columns with a `vec_field.subfield` prefix
283/// - Primitive types: `String`, `bool`, integer types, `f32`, `f64`
284/// - `chrono::DateTime<Utc>` (materialized as `Datetime(Milliseconds, None)`)
285/// - `rust_decimal::Decimal` (materialized as `Decimal(38, 10)`)
286///
287/// Attributes:
288///
289/// - Container-level: `#[df_derive(trait = "path::ToDataFrame")]` to set the `ToDataFrame` trait
290/// path; the `Columnar` path is inferred by replacing the last path segment with `Columnar`.
291/// Optionally, set both explicitly with
292/// `#[df_derive(columnar = "path::Columnar")]`.
293/// - Field-level: `#[df_derive(as_string)]` to stringify values (e.g., enums) during conversion,
294/// resulting in `DataType::String` or `List<String>`.
295///
296/// Notes:
297///
298/// - Enums and generic structs are not supported for derive.
299/// - All nested custom structs must also derive `ToDataFrame`.
300/// - Empty structs: `to_dataframe` yields a single-row, zero-column `DataFrame`; the columnar path
301/// yields a zero-column `DataFrame` with `items.len()` rows.
302#[proc_macro_derive(ToDataFrame, attributes(df_derive))]
303pub fn to_dataframe_derive(input: TokenStream) -> TokenStream {
304 // Parse the input tokens into a syntax tree
305 let ast = parse_macro_input!(input as DeriveInput);
306 // Parse helper attribute configuration (trait paths)
307 let default_df_mod = codegen::resolve_paft_crate_path();
308 let mut to_df_trait_path_ts = quote! { #default_df_mod::ToDataFrame };
309 let mut columnar_trait_path_ts = quote! { #default_df_mod::Columnar };
310
311 for attr in &ast.attrs {
312 if attr.path().is_ident("df_derive") {
313 let parse_res = attr.parse_nested_meta(|meta| {
314 if meta.path.is_ident("trait") {
315 let lit: syn::LitStr = meta.value()?.parse()?;
316 let path: syn::Path = syn::parse_str(&lit.value())
317 .map_err(|e| meta.error(format!("invalid trait path: {e}")))?;
318 to_df_trait_path_ts = quote! { #path };
319
320 // Automatically infer the Columnar trait path by replacing the final segment
321 let mut columnar_path = path;
322 if let Some(last_segment) = columnar_path.segments.last_mut() {
323 last_segment.ident = syn::Ident::new("Columnar", last_segment.ident.span());
324 }
325 columnar_trait_path_ts = quote! { #columnar_path };
326 Ok(())
327 } else if meta.path.is_ident("columnar") {
328 let lit: syn::LitStr = meta.value()?.parse()?;
329 let path: syn::Path = syn::parse_str(&lit.value())
330 .map_err(|e| meta.error(format!("invalid columnar trait path: {e}")))?;
331 columnar_trait_path_ts = quote! { #path };
332 Ok(())
333 } else {
334 Err(meta.error("unsupported key in #[df_derive(...)] attribute"))
335 }
336 });
337 if let Err(err) = parse_res {
338 return err.to_compile_error().into();
339 }
340 }
341 }
342 let config = codegen::MacroConfig {
343 to_dataframe_trait_path: to_df_trait_path_ts,
344 columnar_trait_path: columnar_trait_path_ts,
345 };
346 // Build the intermediate representation
347 let ir = match parser::parse_to_ir(&ast) {
348 Ok(ir) => ir,
349 Err(e) => return e.to_compile_error().into(),
350 };
351
352 // Delegate to the codegen orchestrator
353 let generated = codegen::generate_code(&ir, &config);
354 TokenStream::from(generated)
355}