df_derive/
lib.rs

1//! df-derive – derive fast conversions from your Rust types to Polars `DataFrame`
2//!
3//! ## What this crate does
4//!
5//! Deriving `ToDataFrame` on your structs and tuple structs generates fast, allocation-conscious
6//! code to:
7//!
8//! - Convert a single value to a `polars::prelude::DataFrame`
9//! - Convert a slice of values via a columnar path (efficient batch conversion)
10//! - Inspect the schema (column names and `DataType`s) at compile time via a generated method
11//!
12//! It supports nested structs (flattened with dot notation), `Option<T>`, `Vec<T>`, tuple structs,
13//! and key domain types like `chrono::DateTime<Utc>` and `rust_decimal::Decimal`.
14//!
15//! ## Installation
16//!
17//! Add the macro crate and Polars. You will also need a trait defining the `to_dataframe` behavior
18//! (you can use your own runtime crate/traits; see the override section below). For a minimal inline
19//! trait you can copy, see the Quick start example.
20//!
21//! ```toml
22//! [dependencies]
23//! df-derive = "0.1"
24//! polars = { version = "0.50", features = ["timezones", "dtype-decimal"] }
25//!
26//! # If you use these types in your models
27//! chrono = { version = "0.4", features = ["serde"] }
28//! rust_decimal = { version = "1.36", features = ["serde"] }
29//! ```
30//!
31//! ## Quick start
32//!
33//! Copy-paste runnable example without any external runtime traits. This is a complete working
34//! example that you can run with `cargo run --example quickstart`. In your own project, place the
35//! `dataframe` traits wherever you like and point the derive macro to them (see
36//! "Crate path override").
37//!
38//! ```rust
39//! use df_derive::ToDataFrame;
40//!
41//! mod dataframe {
42//!     use polars::prelude::{DataFrame, DataType, PolarsResult};
43//!
44//!     pub trait ToDataFrame {
45//!         fn to_dataframe(&self) -> PolarsResult<DataFrame>;
46//!         fn empty_dataframe() -> PolarsResult<DataFrame>;
47//!         fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
48//!     }
49//!
50//!     pub trait Columnar: Sized {
51//!         fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
52//!     }
53//! }
54//!
55//! #[derive(ToDataFrame)]
56//! #[df_derive(trait = "crate::dataframe::ToDataFrame")] // Columnar path auto-infers
57//! struct Trade { symbol: String, price: f64, size: u64 }
58//!
59//! fn main() -> polars::prelude::PolarsResult<()> {
60//!     let t = Trade { symbol: "AAPL".into(), price: 187.23, size: 100 };
61//!     let df_single = <Trade as crate::dataframe::ToDataFrame>::to_dataframe(&t)?;
62//!     println!("{}", df_single);
63//!     Ok(())
64//! }
65//! ```
66//!
67//! ## Features
68//!
69//! - **Nested structs (flattening)**: fields of nested structs appear as `outer.inner` columns
70//! - **Vec of primitives and structs**: becomes Polars `List` columns; `Vec<Nested>` becomes
71//!   multiple `outer.subfield` list columns
72//! - **`Option<T>`**: null-aware materialization for both scalars and lists
73//! - **Tuple structs**: supported; columns are named `field_0`, `field_1`, ...
74//! - **Empty structs**: produce `(1, 0)` for instances and `(0, 0)` for empty frames
75//! - **Schema discovery**: `T::schema() -> Vec<(&'static str, DataType)>`
76//! - **Columnar batch conversion**: `[T]` via a generated `Columnar` implementation
77//!
78//! ### Attribute helpers
79//!
80//! Use `#[df_derive(as_string)]` to stringify values during conversion. This is particularly useful
81//! for enums:
82//!
83//! ```rust
84//! use df_derive::ToDataFrame;
85//!
86//! // Minimal runtime traits used by the derive macro
87//! mod dataframe {
88//!     use polars::prelude::{DataFrame, DataType, PolarsResult};
89//!     pub trait ToDataFrame {
90//!         fn to_dataframe(&self) -> PolarsResult<DataFrame>;
91//!         fn empty_dataframe() -> PolarsResult<DataFrame>;
92//!         fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
93//!     }
94//!     pub trait Columnar: Sized {
95//!         fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
96//!     }
97//! }
98//!
99//! #[derive(Clone, Debug, PartialEq)]
100//! enum Status { Active, Inactive }
101//!
102//! impl std::fmt::Display for Status {
103//!     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104//!         match self {
105//!             Status::Active => write!(f, "Active"),
106//!             Status::Inactive => write!(f, "Inactive"),
107//!         }
108//!     }
109//! }
110//!
111//! #[derive(ToDataFrame)]
112//! #[df_derive(trait = "crate::dataframe::ToDataFrame")]
113//! struct WithEnums {
114//!     #[df_derive(as_string)]
115//!     status: Status,
116//!     #[df_derive(as_string)]
117//!     opt_status: Option<Status>,
118//!     #[df_derive(as_string)]
119//!     statuses: Vec<Status>,
120//! }
121//!
122//! fn main() {}
123//! ```
124//!
125//! Columns will use `DataType::String` (or `List<String>` for `Vec<_>`), and values are produced via
126//! `ToString`.
127//!
128//! ## Supported types
129//!
130//! - **Primitives**: `String`, `bool`, integer types (`i8/i16/i32/i64/isize`, `u8/u16/u32/u64/usize`),
131//!   `f32`, `f64`
132//! - **Time**: `chrono::DateTime<Utc>` → materialized as `Datetime(Milliseconds, None)`
133//! - **Decimal**: `rust_decimal::Decimal` → `Decimal(38, 10)`
134//! - **Wrappers**: `Option<T>`, `Vec<T>` in any nesting order
135//! - **Custom structs**: any other struct deriving `ToDataFrame` (supports nesting and `Vec<Nested>`,
136//!   yielding prefixed list columns)
137//! - **Tuple structs**: unnamed fields are emitted as `field_{index}`
138//!
139//! ## Column naming
140//!
141//! - Named struct fields: `field_name`
142//! - Nested structs: `outer.inner` (recursively)
143//! - Vec of custom structs: `vec_field.subfield` (list dtype)
144//! - Tuple structs: `field_0`, `field_1`, ...
145//!
146//! ## Generated API
147//!
148//! For every `#[derive(ToDataFrame)]` type `T` the macro generates implementations of two traits
149//! (paths configurable via `#[df_derive(...)]`):
150//!
151//! - `ToDataFrame` for `T`:
152//!   - `fn to_dataframe(&self) -> PolarsResult<DataFrame>`
153//!   - `fn empty_dataframe() -> PolarsResult<DataFrame>`
154//!   - `fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>`
155//! - `Columnar` for `T`:
156//!   - `fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>`
157//!
158//! Empty-struct behavior:
159//!
160//! - `to_dataframe(&self)` produces a single-row `DataFrame` with zero columns
161//! - `empty_dataframe()` produces a `(0, 0)` `DataFrame`
162//! - `columnar_to_dataframe(&[T])` produces a zero-column `DataFrame` with `items.len()` rows
163//!
164//! ## Examples
165//!
166//! This crate includes several runnable examples in the `examples/` directory:
167//!
168//! - `quickstart` — Basic usage with single and batch `DataFrame` conversion
169//! - `nested` — Nested structs with dot notation column naming
170//! - `vec_custom` — Vec of custom structs creating List columns
171//! - `tuple` — Tuple structs with `field_0`, `field_1` naming
172//! - `datetime_decimal` — `DateTime` and `Decimal` type support
173//! - `as_string` — `#[df_derive(as_string)]` attribute for enum conversion
174//!
175//! ## Limitations and guidance
176//!
177//! - **Unsupported container types**: maps/sets like `HashMap<_, _>` are not supported
178//! - **Enums**: derive on enums is not supported; use `#[df_derive(as_string)]` on enum fields
179//! - **Generics**: generic structs are not supported by the derive (see `tests/fail`)
180//! - **All nested types must also derive**: if you nest a struct, it must also derive `ToDataFrame`
181//!
182//! ## Performance notes
183//!
184//! The derive implements an internal `Columnar` path used by runtimes to convert slices efficiently,
185//! avoiding per-row `DataFrame` builds. Criterion benches in `benches/` exercise wide, deep, and
186//! nested-Vec shapes (100k+ rows), demonstrating consistent performance across shapes.
187//!
188//! ## Compatibility
189//!
190//! - **Rust edition**: 2024
191//! - **Polars**: 0.50 (tested). Enable Polars features `timezones` and `dtype-decimal` if you use
192//!   `DateTime<Utc>` or `Decimal`.
193//!
194//! ## License
195//!
196//! MIT. See `LICENSE`.
197//!
198//! ## Crate path override (about paft)
199//!
200//! By default, the macro resolves trait paths to a `dataframe` module under the `paft` ecosystem.
201//! Concretely, it attempts to implement `paft::dataframe::ToDataFrame` and
202//! `paft::dataframe::Columnar` (or `paft-core::dataframe::...`) if those crates are present. You can
203//! override these paths for any runtime by annotating your type with `#[df_derive(...)]`:
204//!
205//! ```rust
206//! use df_derive::ToDataFrame;
207//!
208//! // Define a local runtime with the expected traits
209//! mod my_runtime { pub mod dataframe {
210//!     use polars::prelude::{DataFrame, DataType, PolarsResult};
211//!     pub trait ToDataFrame {
212//!         fn to_dataframe(&self) -> PolarsResult<DataFrame>;
213//!         fn empty_dataframe() -> PolarsResult<DataFrame>;
214//!         fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
215//!     }
216//!     pub trait Columnar: Sized {
217//!         fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
218//!     }
219//! }}
220//!
221//! #[derive(ToDataFrame)]
222//! #[df_derive(trait = "my_runtime::dataframe::ToDataFrame")] // Columnar inferred
223//! struct MyType {}
224//!
225//! fn main() {}
226//! ```
227//!
228//! If you need to override both explicitly:
229//!
230//! ```rust
231//! use df_derive::ToDataFrame;
232//!
233//! // Define a local runtime with the expected traits
234//! mod my_runtime { pub mod dataframe {
235//!     use polars::prelude::{DataFrame, DataType, PolarsResult};
236//!     pub trait ToDataFrame {
237//!         fn to_dataframe(&self) -> PolarsResult<DataFrame>;
238//!         fn empty_dataframe() -> PolarsResult<DataFrame>;
239//!         fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>;
240//!     }
241//!     pub trait Columnar: Sized {
242//!         fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>;
243//!     }
244//! }}
245//!
246//! #[derive(ToDataFrame)]
247//! #[df_derive(
248//!     trait = "my_runtime::dataframe::ToDataFrame",
249//!     columnar = "my_runtime::dataframe::Columnar",
250//! )]
251//! struct MyType {}
252//!
253//! fn main() {}
254//! ```
255#![warn(missing_docs)]
256extern crate proc_macro;
257
258mod codegen;
259mod ir;
260mod parser;
261mod type_analysis;
262use proc_macro::TokenStream;
263use quote::quote;
264use syn::{DeriveInput, parse_macro_input};
265
266/// Derive `ToDataFrame` for structs and tuple structs to generate fast conversions to Polars.
267///
268/// What this macro generates (paths configurable via `#[df_derive(...)]`):
269///
270/// - An implementation of `ToDataFrame` for the annotated type `T` providing:
271///   - `fn to_dataframe(&self) -> PolarsResult<DataFrame>`
272///   - `fn empty_dataframe() -> PolarsResult<DataFrame>`
273///   - `fn schema() -> PolarsResult<Vec<(&'static str, DataType)>>`
274/// - An implementation of `Columnar` for `T` providing
275///   `fn columnar_to_dataframe(items: &[Self]) -> PolarsResult<DataFrame>`
276///
277/// Supported shapes and types:
278///
279/// - Named and tuple structs (tuple fields are named `field_{index}`)
280/// - Nested structs are flattened using dot notation (e.g., `outer.inner`)
281/// - Wrappers `Option<T>` and `Vec<T>` in any nesting order, with `Vec<Struct>` producing multiple
282///   list columns with a `vec_field.subfield` prefix
283/// - Primitive types: `String`, `bool`, integer types, `f32`, `f64`
284/// - `chrono::DateTime<Utc>` (materialized as `Datetime(Milliseconds, None)`)
285/// - `rust_decimal::Decimal` (materialized as `Decimal(38, 10)`)
286///
287/// Attributes:
288///
289/// - Container-level: `#[df_derive(trait = "path::ToDataFrame")]` to set the `ToDataFrame` trait
290///   path; the `Columnar` path is inferred by replacing the last path segment with `Columnar`.
291///   Optionally, set both explicitly with
292///   `#[df_derive(columnar = "path::Columnar")]`.
293/// - Field-level: `#[df_derive(as_string)]` to stringify values (e.g., enums) during conversion,
294///   resulting in `DataType::String` or `List<String>`.
295///
296/// Notes:
297///
298/// - Enums and generic structs are not supported for derive.
299/// - All nested custom structs must also derive `ToDataFrame`.
300/// - Empty structs: `to_dataframe` yields a single-row, zero-column `DataFrame`; the columnar path
301///   yields a zero-column `DataFrame` with `items.len()` rows.
302#[proc_macro_derive(ToDataFrame, attributes(df_derive))]
303pub fn to_dataframe_derive(input: TokenStream) -> TokenStream {
304    // Parse the input tokens into a syntax tree
305    let ast = parse_macro_input!(input as DeriveInput);
306    // Parse helper attribute configuration (trait paths)
307    let default_df_mod = codegen::resolve_paft_crate_path();
308    let mut to_df_trait_path_ts = quote! { #default_df_mod::ToDataFrame };
309    let mut columnar_trait_path_ts = quote! { #default_df_mod::Columnar };
310
311    for attr in &ast.attrs {
312        if attr.path().is_ident("df_derive") {
313            let parse_res = attr.parse_nested_meta(|meta| {
314                if meta.path.is_ident("trait") {
315                    let lit: syn::LitStr = meta.value()?.parse()?;
316                    let path: syn::Path = syn::parse_str(&lit.value())
317                        .map_err(|e| meta.error(format!("invalid trait path: {e}")))?;
318                    to_df_trait_path_ts = quote! { #path };
319
320                    // Automatically infer the Columnar trait path by replacing the final segment
321                    let mut columnar_path = path;
322                    if let Some(last_segment) = columnar_path.segments.last_mut() {
323                        last_segment.ident = syn::Ident::new("Columnar", last_segment.ident.span());
324                    }
325                    columnar_trait_path_ts = quote! { #columnar_path };
326                    Ok(())
327                } else if meta.path.is_ident("columnar") {
328                    let lit: syn::LitStr = meta.value()?.parse()?;
329                    let path: syn::Path = syn::parse_str(&lit.value())
330                        .map_err(|e| meta.error(format!("invalid columnar trait path: {e}")))?;
331                    columnar_trait_path_ts = quote! { #path };
332                    Ok(())
333                } else {
334                    Err(meta.error("unsupported key in #[df_derive(...)] attribute"))
335                }
336            });
337            if let Err(err) = parse_res {
338                return err.to_compile_error().into();
339            }
340        }
341    }
342    let config = codegen::MacroConfig {
343        to_dataframe_trait_path: to_df_trait_path_ts,
344        columnar_trait_path: columnar_trait_path_ts,
345    };
346    // Build the intermediate representation
347    let ir = match parser::parse_to_ir(&ast) {
348        Ok(ir) => ir,
349        Err(e) => return e.to_compile_error().into(),
350    };
351
352    // Delegate to the codegen orchestrator
353    let generated = codegen::generate_code(&ir, &config);
354    TokenStream::from(generated)
355}