polars_testing/asserts/
utils.rs

1use std::ops::Not;
2
3use polars_core::datatypes::unpack_dtypes;
4use polars_core::prelude::*;
5use polars_ops::series::abs;
6
7/// Configuration options for comparing Series equality.
8///
9/// Controls the behavior of Series equality comparisons by specifying
10/// which aspects to check and the tolerance for floating point comparisons.
11pub struct SeriesEqualOptions {
12    /// Whether to check that the data types match.
13    pub check_dtypes: bool,
14    /// Whether to check that the Series names match.
15    pub check_names: bool,
16    /// Whether to check that elements appear in the same order.
17    pub check_order: bool,
18    /// Whether to check for exact equality (true) or approximate equality (false) for floating point values.
19    pub check_exact: bool,
20    /// Relative tolerance for approximate equality of floating point values.
21    pub rtol: f64,
22    /// Absolute tolerance for approximate equality of floating point values.
23    pub atol: f64,
24    /// Whether to compare categorical values as strings.
25    pub categorical_as_str: bool,
26}
27
28impl Default for SeriesEqualOptions {
29    /// Creates a new `SeriesEqualOptions` with default settings.
30    ///
31    /// Default configuration:
32    /// - Checks data types, names, and order
33    /// - Uses exact equality comparisons
34    /// - Sets relative tolerance to 1e-5 and absolute tolerance to 1e-8 for floating point comparisons
35    /// - Does not convert categorical values to strings for comparison
36    fn default() -> Self {
37        Self {
38            check_dtypes: true,
39            check_names: true,
40            check_order: true,
41            check_exact: true,
42            rtol: 1e-5,
43            atol: 1e-8,
44            categorical_as_str: false,
45        }
46    }
47}
48
49impl SeriesEqualOptions {
50    /// Creates a new `SeriesEqualOptions` with default settings.
51    pub fn new() -> Self {
52        Self::default()
53    }
54
55    /// Sets whether to check that data types match.
56    pub fn with_check_dtypes(mut self, value: bool) -> Self {
57        self.check_dtypes = value;
58        self
59    }
60
61    /// Sets whether to check that Series names match.
62    pub fn with_check_names(mut self, value: bool) -> Self {
63        self.check_names = value;
64        self
65    }
66
67    /// Sets whether to check that elements appear in the same order.
68    pub fn with_check_order(mut self, value: bool) -> Self {
69        self.check_order = value;
70        self
71    }
72
73    /// Sets whether to check for exact equality (true) or approximate equality (false) for floating point values.
74    pub fn with_check_exact(mut self, value: bool) -> Self {
75        self.check_exact = value;
76        self
77    }
78
79    /// Sets the relative tolerance for approximate equality of floating point values.
80    pub fn with_rtol(mut self, value: f64) -> Self {
81        self.rtol = value;
82        self
83    }
84
85    /// Sets the absolute tolerance for approximate equality of floating point values.
86    pub fn with_atol(mut self, value: f64) -> Self {
87        self.atol = value;
88        self
89    }
90
91    /// Sets whether to compare categorical values as strings.
92    pub fn with_categorical_as_str(mut self, value: bool) -> Self {
93        self.categorical_as_str = value;
94        self
95    }
96}
97
98/// Change a (possibly nested) Categorical data type to a String data type.
99pub fn categorical_dtype_to_string_dtype(dtype: &DataType) -> DataType {
100    match dtype {
101        DataType::Categorical(..) => DataType::String,
102        DataType::List(inner) => {
103            let inner_cast = categorical_dtype_to_string_dtype(inner);
104            DataType::List(Box::new(inner_cast))
105        },
106        DataType::Array(inner, size) => {
107            let inner_cast = categorical_dtype_to_string_dtype(inner);
108            DataType::Array(Box::new(inner_cast), *size)
109        },
110        DataType::Struct(fields) => {
111            let transformed_fields = fields
112                .iter()
113                .map(|field| {
114                    Field::new(
115                        field.name().clone(),
116                        categorical_dtype_to_string_dtype(field.dtype()),
117                    )
118                })
119                .collect::<Vec<Field>>();
120
121            DataType::Struct(transformed_fields)
122        },
123        _ => dtype.clone(),
124    }
125}
126
127/// Cast a (possibly nested) Categorical Series to a String Series.
128pub fn categorical_series_to_string(s: &Series) -> Series {
129    let dtype = s.dtype();
130    let noncat_dtype = categorical_dtype_to_string_dtype(dtype);
131
132    if *dtype != noncat_dtype {
133        s.cast(&noncat_dtype).unwrap()
134    } else {
135        s.clone()
136    }
137}
138
139/// Returns true if both DataTypes are floating point types.
140pub fn comparing_floats(left: &DataType, right: &DataType) -> bool {
141    left.is_float() && right.is_float()
142}
143
144/// Returns true if both DataTypes are list-like (either List or Array types).
145pub fn comparing_lists(left: &DataType, right: &DataType) -> bool {
146    matches!(left, DataType::List(_) | DataType::Array(_, _))
147        && matches!(right, DataType::List(_) | DataType::Array(_, _))
148}
149
150/// Returns true if both DataTypes are struct types.
151pub fn comparing_structs(left: &DataType, right: &DataType) -> bool {
152    left.is_struct() && right.is_struct()
153}
154
155/// Returns true if both DataTypes are nested types (lists or structs) that contain floating point types within them.
156/// First checks if both types are either lists or structs, then unpacks their nested DataTypes to determine if
157/// at least one floating point type exists in each of the nested structures.
158pub fn comparing_nested_floats(left: &DataType, right: &DataType) -> bool {
159    if !comparing_lists(left, right) && !comparing_structs(left, right) {
160        return false;
161    }
162
163    let left_dtypes = unpack_dtypes(left, false);
164    let right_dtypes = unpack_dtypes(right, false);
165
166    let left_has_floats = left_dtypes.iter().any(|dt| dt.is_float());
167    let right_has_floats = right_dtypes.iter().any(|dt| dt.is_float());
168
169    left_has_floats && right_has_floats
170}
171
172/// Ensures that null values in two Series match exactly and returns an error if any mismatches are found.
173pub fn assert_series_null_values_match(left: &Series, right: &Series) -> PolarsResult<()> {
174    let null_value_mismatch = left.is_null().not_equal(&right.is_null());
175
176    if null_value_mismatch.any() {
177        return Err(polars_err!(
178            assertion_error = "Series",
179            "null value mismatch",
180            left.null_count(),
181            right.null_count()
182        ));
183    }
184
185    Ok(())
186}
187
188/// Validates that NaN patterns are identical between two float Series, returning error if any mismatches are found.
189pub fn assert_series_nan_values_match(left: &Series, right: &Series) -> PolarsResult<()> {
190    if !comparing_floats(left.dtype(), right.dtype()) {
191        return Ok(());
192    }
193    let left_nan = left.is_nan()?;
194    let right_nan = right.is_nan()?;
195
196    let nan_value_mismatch = left_nan.not_equal(&right_nan);
197
198    let left_nan_count = left_nan.sum().unwrap_or(0);
199    let right_nan_count = right_nan.sum().unwrap_or(0);
200
201    if nan_value_mismatch.any() {
202        return Err(polars_err!(
203            assertion_error = "Series",
204            "nan value mismatch",
205            left_nan_count,
206            right_nan_count
207        ));
208    }
209
210    Ok(())
211}
212
213/// Verifies that two Series have values within a specified tolerance.
214///
215/// This function checks if the values in `left` and `right` Series that are marked as unequal
216/// in the `unequal` boolean array are within the specified relative and absolute tolerances.
217///
218/// # Arguments
219///
220/// * `left` - The first Series to compare
221/// * `right` - The second Series to compare
222/// * `unequal` - Boolean ChunkedArray indicating which elements to check (true = check this element)
223/// * `rtol` - Relative tolerance (multiplied by the absolute value of the right Series)
224/// * `atol` - Absolute tolerance added to the relative tolerance
225///
226/// # Returns
227///
228/// * `Ok(())` if all values are within tolerance
229/// * `Err` with details about problematic values if any values exceed the tolerance
230///
231/// # Formula
232///
233/// Values are considered within tolerance if:
234/// `|left - right| <= (rtol * |right| + atol)` OR values are exactly equal
235///
236pub fn assert_series_values_within_tolerance(
237    left: &Series,
238    right: &Series,
239    unequal: &ChunkedArray<BooleanType>,
240    rtol: f64,
241    atol: f64,
242) -> PolarsResult<()> {
243    let left_unequal = left.filter(unequal)?;
244    let right_unequal = right.filter(unequal)?;
245
246    let difference = (&left_unequal - &right_unequal)?;
247    let abs_difference = abs(&difference)?;
248
249    let right_abs = abs(&right_unequal)?;
250
251    let rtol_series = Series::new("rtol".into(), &[rtol]);
252    let atol_series = Series::new("atol".into(), &[atol]);
253
254    let rtol_part = (&right_abs * &rtol_series)?;
255    let tolerance = (&rtol_part + &atol_series)?;
256
257    let finite_mask = right_unequal.is_finite()?;
258    let diff_within_tol = abs_difference.lt_eq(&tolerance)?;
259    let equal_values = left_unequal.equal(&right_unequal)?;
260
261    let within_tolerance = (diff_within_tol & finite_mask) | equal_values;
262
263    if within_tolerance.all() {
264        Ok(())
265    } else {
266        let exceeded_indices = within_tolerance.not();
267        let problematic_left = left_unequal.filter(&exceeded_indices)?;
268        let problematic_right = right_unequal.filter(&exceeded_indices)?;
269
270        Err(polars_err!(
271            assertion_error = "Series",
272            "values not within tolerance",
273            problematic_left,
274            problematic_right
275        ))
276    }
277}
278
279/// Compares two Series for equality with configurable options for ordering, exact matching, and tolerance.
280///
281/// This function verifies that the values in `left` and `right` Series are equal according to
282/// the specified comparison criteria. It handles different types including floats and nested types
283/// with appropriate equality checks.
284///
285/// # Arguments
286///
287/// * `left` - The first Series to compare
288/// * `right` - The second Series to compare
289/// * `check_order` - If true, elements must be in the same order; if false, Series will be sorted before comparison
290/// * `check_exact` - If true, requires exact equality; if false, allows approximate equality for floats within tolerance
291/// * `rtol` - Relative tolerance for float comparison (used when `check_exact` is false)
292/// * `atol` - Absolute tolerance for float comparison (used when `check_exact` is false)
293/// * `categorical_as_str` - If true, converts categorical Series to strings before comparison
294///
295/// # Returns
296///
297/// * `Ok(())` if Series match according to specified criteria
298/// * `Err` with details about mismatches if Series differ
299///
300/// # Behavior
301///
302/// 1. Handles categorical Series based on `categorical_as_str` flag
303/// 2. Sorts Series if `check_order` is false
304/// 3. For nested float types, delegates to `assert_series_nested_values_equal`
305/// 4. For non-float types or when `check_exact` is true, requires exact match
306/// 5. For float types with approximate matching:
307///    - Verifies null values match using `assert_series_null_values_match`
308///    - Verifies NaN values match using `assert_series_nan_values_match`
309///    - Verifies float values are within tolerance using `assert_series_values_within_tolerance`
310///
311pub fn assert_series_values_equal(
312    left: &Series,
313    right: &Series,
314    check_order: bool,
315    check_exact: bool,
316    rtol: f64,
317    atol: f64,
318    categorical_as_str: bool,
319) -> PolarsResult<()> {
320    let (left, right) = if categorical_as_str {
321        (
322            categorical_series_to_string(left),
323            categorical_series_to_string(right),
324        )
325    } else {
326        (left.clone(), right.clone())
327    };
328
329    let (left, right) = if !check_order {
330        (
331            left.sort(SortOptions::default())?,
332            right.sort(SortOptions::default())?,
333        )
334    } else {
335        (left.clone(), right.clone())
336    };
337
338    let unequal = match left.not_equal_missing(&right) {
339        Ok(result) => result,
340        Err(_) => {
341            return Err(polars_err!(
342                assertion_error = "Series",
343                "incompatible data types",
344                left.dtype(),
345                right.dtype()
346            ));
347        },
348    };
349
350    if comparing_nested_floats(left.dtype(), right.dtype()) {
351        let filtered_left = left.filter(&unequal)?;
352        let filtered_right = right.filter(&unequal)?;
353
354        match assert_series_nested_values_equal(
355            &filtered_left,
356            &filtered_right,
357            check_exact,
358            rtol,
359            atol,
360            categorical_as_str,
361        ) {
362            Ok(_) => return Ok(()),
363            Err(_) => {
364                return Err(polars_err!(
365                    assertion_error = "Series",
366                    "nested value mismatch",
367                    left,
368                    right
369                ));
370            },
371        }
372    }
373
374    if !unequal.any() {
375        return Ok(());
376    }
377
378    if check_exact || !left.dtype().is_float() || !right.dtype().is_float() {
379        return Err(polars_err!(
380            assertion_error = "Series",
381            "exact value mismatch",
382            left,
383            right
384        ));
385    }
386
387    assert_series_null_values_match(&left, &right)?;
388    assert_series_nan_values_match(&left, &right)?;
389    assert_series_values_within_tolerance(&left, &right, &unequal, rtol, atol)?;
390
391    Ok(())
392}
393
394/// Recursively compares nested Series structures (lists or structs) for equality.
395///
396/// This function handles the comparison of complex nested data structures by recursively
397/// applying appropriate equality checks based on the nested data type.
398///
399/// # Arguments
400///
401/// * `left` - The first nested Series to compare
402/// * `right` - The second nested Series to compare
403/// * `check_exact` - If true, requires exact equality; if false, allows approximate equality for floats
404/// * `rtol` - Relative tolerance for float comparison (used when `check_exact` is false)
405/// * `atol` - Absolute tolerance for float comparison (used when `check_exact` is false)
406/// * `categorical_as_str` - If true, converts categorical Series to strings before comparison
407///
408/// # Returns
409///
410/// * `Ok(())` if nested Series match according to specified criteria
411/// * `Err` with details about mismatches if Series differ
412///
413/// # Behavior
414///
415/// For List types:
416/// 1. Iterates through corresponding elements in both Series
417/// 2. Returns error if null values are encountered
418/// 3. Creates single-element Series for each value and explodes them
419/// 4. Recursively calls `assert_series_values_equal` on the exploded Series
420///
421/// For Struct types:
422/// 1. Unnests both struct Series to access their columns
423/// 2. Iterates through corresponding columns
424/// 3. Recursively calls `assert_series_values_equal` on each column pair
425///
426pub fn assert_series_nested_values_equal(
427    left: &Series,
428    right: &Series,
429    check_exact: bool,
430    rtol: f64,
431    atol: f64,
432    categorical_as_str: bool,
433) -> PolarsResult<()> {
434    if comparing_lists(left.dtype(), right.dtype()) {
435        let left_rechunked = left.rechunk();
436        let right_rechunked = right.rechunk();
437
438        let zipped = left_rechunked.iter().zip(right_rechunked.iter());
439
440        for (s1, s2) in zipped {
441            if s1.is_null() || s2.is_null() {
442                return Err(polars_err!(
443                    assertion_error = "Series",
444                    "nested value mismatch",
445                    s1,
446                    s2
447                ));
448            } else {
449                let s1_series = Series::new("".into(), std::slice::from_ref(&s1));
450                let s2_series = Series::new("".into(), std::slice::from_ref(&s2));
451
452                match assert_series_values_equal(
453                    &s1_series.explode(false)?,
454                    &s2_series.explode(false)?,
455                    true,
456                    check_exact,
457                    rtol,
458                    atol,
459                    categorical_as_str,
460                ) {
461                    Ok(_) => continue,
462                    Err(e) => return Err(e),
463                }
464            }
465        }
466    } else {
467        let ls = left.struct_()?.clone().unnest();
468        let rs = right.struct_()?.clone().unnest();
469
470        for col_name in ls.get_column_names() {
471            let s1_column = ls.column(col_name)?;
472            let s2_column = rs.column(col_name)?;
473
474            let s1_series = s1_column.as_materialized_series();
475            let s2_series = s2_column.as_materialized_series();
476
477            match assert_series_values_equal(
478                s1_series,
479                s2_series,
480                true,
481                check_exact,
482                rtol,
483                atol,
484                categorical_as_str,
485            ) {
486                Ok(_) => continue,
487                Err(e) => return Err(e),
488            }
489        }
490    }
491
492    Ok(())
493}
494
495/// Verifies that two Series are equal according to a set of configurable criteria.
496///
497/// This function serves as the main entry point for comparing Series, checking various
498/// metadata properties before comparing the actual values.
499///
500/// # Arguments
501///
502/// * `left` - The first Series to compare
503/// * `right` - The second Series to compare
504/// * `options` - A `SeriesEqualOptions` struct containing configuration parameters:
505///   * `check_names` - If true, verifies Series names match
506///   * `check_dtypes` - If true, verifies data types match
507///   * `check_order` - If true, elements must be in the same order
508///   * `check_exact` - If true, requires exact equality for float values
509///   * `rtol` - Relative tolerance for float comparison
510///   * `atol` - Absolute tolerance for float comparison
511///   * `categorical_as_str` - If true, converts categorical Series to strings before comparison
512///
513/// # Returns
514///
515/// * `Ok(())` if Series match according to all specified criteria
516/// * `Err` with details about the first mismatch encountered:
517///   * Length mismatch
518///   * Name mismatch (if checking names)
519///   * Data type mismatch (if checking dtypes)
520///   * Value mismatches (via `assert_series_values_equal`)
521///
522/// # Order of Checks
523///
524/// 1. Series length
525/// 2. Series names (if `check_names` is true)
526/// 3. Data types (if `check_dtypes` is true)
527/// 4. Series values (delegated to `assert_series_values_equal`)
528///
529pub fn assert_series_equal(
530    left: &Series,
531    right: &Series,
532    options: SeriesEqualOptions,
533) -> PolarsResult<()> {
534    if left.len() != right.len() {
535        return Err(polars_err!(
536            assertion_error = "Series",
537            "length mismatch",
538            left.len(),
539            right.len()
540        ));
541    }
542
543    if options.check_names && left.name() != right.name() {
544        return Err(polars_err!(
545            assertion_error = "Series",
546            "name mismatch",
547            left.name(),
548            right.name()
549        ));
550    }
551
552    if options.check_dtypes && left.dtype() != right.dtype() {
553        return Err(polars_err!(
554            assertion_error = "Series",
555            "dtype mismatch",
556            left.dtype(),
557            right.dtype()
558        ));
559    }
560
561    assert_series_values_equal(
562        left,
563        right,
564        options.check_order,
565        options.check_exact,
566        options.rtol,
567        options.atol,
568        options.categorical_as_str,
569    )
570}
571
572/// Configuration options for comparing DataFrame equality.
573///
574/// Controls the behavior of DataFrame equality comparisons by specifying
575/// which aspects to check and the tolerance for floating point comparisons.
576pub struct DataFrameEqualOptions {
577    /// Whether to check that rows appear in the same order.
578    pub check_row_order: bool,
579    /// Whether to check that columns appear in the same order.
580    pub check_column_order: bool,
581    /// Whether to check that the data types match for corresponding columns.
582    pub check_dtypes: bool,
583    /// Whether to check for exact equality (true) or approximate equality (false) for floating point values.
584    pub check_exact: bool,
585    /// Relative tolerance for approximate equality of floating point values.
586    pub rtol: f64,
587    /// Absolute tolerance for approximate equality of floating point values.
588    pub atol: f64,
589    /// Whether to compare categorical values as strings.
590    pub categorical_as_str: bool,
591}
592
593impl Default for DataFrameEqualOptions {
594    /// Creates a new `DataFrameEqualOptions` with default settings.
595    ///
596    /// Default configuration:
597    /// - Checks row order, column order, and data types
598    /// - Uses approximate equality comparisons for floating point values
599    /// - Sets relative tolerance to 1e-5 and absolute tolerance to 1e-8 for floating point comparisons
600    /// - Does not convert categorical values to strings for comparison
601    fn default() -> Self {
602        Self {
603            check_row_order: true,
604            check_column_order: true,
605            check_dtypes: true,
606            check_exact: false,
607            rtol: 1e-5,
608            atol: 1e-8,
609            categorical_as_str: false,
610        }
611    }
612}
613
614impl DataFrameEqualOptions {
615    /// Creates a new `DataFrameEqualOptions` with default settings.
616    pub fn new() -> Self {
617        Self::default()
618    }
619
620    /// Sets whether to check that rows appear in the same order.
621    pub fn with_check_row_order(mut self, value: bool) -> Self {
622        self.check_row_order = value;
623        self
624    }
625
626    /// Sets whether to check that columns appear in the same order.
627    pub fn with_check_column_order(mut self, value: bool) -> Self {
628        self.check_column_order = value;
629        self
630    }
631
632    /// Sets whether to check that data types match for corresponding columns.
633    pub fn with_check_dtypes(mut self, value: bool) -> Self {
634        self.check_dtypes = value;
635        self
636    }
637
638    /// Sets whether to check for exact equality (true) or approximate equality (false) for floating point values.
639    pub fn with_check_exact(mut self, value: bool) -> Self {
640        self.check_exact = value;
641        self
642    }
643
644    /// Sets the relative tolerance for approximate equality of floating point values.
645    pub fn with_rtol(mut self, value: f64) -> Self {
646        self.rtol = value;
647        self
648    }
649
650    /// Sets the absolute tolerance for approximate equality of floating point values.
651    pub fn with_atol(mut self, value: f64) -> Self {
652        self.atol = value;
653        self
654    }
655
656    /// Sets whether to compare categorical values as strings.
657    pub fn with_categorical_as_str(mut self, value: bool) -> Self {
658        self.categorical_as_str = value;
659        self
660    }
661}
662
663/// Compares DataFrame schemas for equality based on specified criteria.
664///
665/// This function validates that two DataFrames have compatible schemas by checking
666/// column names, their order, and optionally their data types according to the
667/// provided configuration parameters.
668///
669/// # Arguments
670///
671/// * `left` - The first DataFrame to compare
672/// * `right` - The second DataFrame to compare
673/// * `check_dtypes` - If true, requires data types to match for corresponding columns
674/// * `check_column_order` - If true, requires columns to appear in the same order
675///
676/// # Returns
677///
678/// * `Ok(())` if DataFrame schemas match according to specified criteria
679/// * `Err` with details about schema mismatches if DataFrames differ
680///
681/// # Behavior
682///
683/// The function performs schema validation in the following order:
684///
685/// 1. **Fast path**: Returns immediately if schemas are identical
686/// 2. **Column name validation**: Ensures both DataFrames have the same set of column names
687///    - Reports columns present in left but missing in right
688///    - Reports columns present in right but missing in left
689/// 3. **Column order validation**: If `check_column_order` is true, verifies columns appear in the same sequence
690/// 4. **Data type validation**: If `check_dtypes` is true, ensures corresponding columns have matching data types
691///    - When `check_column_order` is false, compares data type sets for equality
692///    - When `check_column_order` is true, performs more precise type checking
693///
694pub fn assert_dataframe_schema_equal(
695    left: &DataFrame,
696    right: &DataFrame,
697    check_dtypes: bool,
698    check_column_order: bool,
699) -> PolarsResult<()> {
700    let left_schema = left.schema();
701    let right_schema = right.schema();
702
703    let ordered_left_cols = left.get_column_names();
704    let ordered_right_cols = right.get_column_names();
705
706    let left_set: PlHashSet<&PlSmallStr> = ordered_left_cols.iter().copied().collect();
707    let right_set: PlHashSet<&PlSmallStr> = ordered_right_cols.iter().copied().collect();
708
709    let left_dtypes: PlHashSet<DataType> = left.dtypes().into_iter().collect();
710    let right_dtypes: PlHashSet<DataType> = right.dtypes().into_iter().collect();
711
712    // Fast path for equal DataFrames
713    if left_schema == right_schema {
714        return Ok(());
715    }
716
717    if left_set != right_set {
718        let left_not_right: Vec<_> = left_set
719            .iter()
720            .filter(|col| !right_set.contains(*col))
721            .collect();
722
723        if !left_not_right.is_empty() {
724            return Err(polars_err!(
725                assertion_error = "DataFrame",
726                format!(
727                    "columns mismatch: {:?} in left, but not in right",
728                    left_not_right
729                ),
730                format!("{:?}", left_set),
731                format!("{:?}", right_set)
732            ));
733        } else {
734            let right_not_left: Vec<_> = right_set
735                .iter()
736                .filter(|col| !left_set.contains(*col))
737                .collect();
738
739            return Err(polars_err!(
740                assertion_error = "DataFrame",
741                format!(
742                    "columns mismatch: {:?} in right, but not in left",
743                    right_not_left
744                ),
745                format!("{:?}", left_set),
746                format!("{:?}", right_set)
747            ));
748        }
749    }
750
751    if check_column_order && ordered_left_cols != ordered_right_cols {
752        return Err(polars_err!(
753            assertion_error = "DataFrame",
754            "columns are not in the same order",
755            format!("{:?}", ordered_left_cols),
756            format!("{:?}", ordered_right_cols)
757        ));
758    }
759
760    if check_dtypes && (check_column_order || left_dtypes != right_dtypes) {
761        return Err(polars_err!(
762            assertion_error = "DataFrame",
763            "data types do not match",
764            format!("{:?}", left_dtypes),
765            format!("{:?}", right_dtypes)
766        ));
767    }
768
769    Ok(())
770}
771
772/// Verifies that two DataFrames are equal according to a set of configurable criteria.
773///
774/// This function serves as the main entry point for comparing DataFrames, first validating
775/// schema compatibility and then comparing the actual data values column by column.
776///
777/// # Arguments
778///
779/// * `left` - The first DataFrame to compare
780/// * `right` - The second DataFrame to compare
781/// * `options` - A `DataFrameEqualOptions` struct containing configuration parameters:
782///   * `check_row_order` - If true, rows must be in the same order
783///   * `check_column_order` - If true, columns must be in the same order
784///   * `check_dtypes` - If true, verifies data types match for corresponding columns
785///   * `check_exact` - If true, requires exact equality for float values
786///   * `rtol` - Relative tolerance for float comparison
787///   * `atol` - Absolute tolerance for float comparison
788///   * `categorical_as_str` - If true, converts categorical values to strings before comparison
789///
790/// # Returns
791///
792/// * `Ok(())` if DataFrames match according to all specified criteria
793/// * `Err` with details about the first mismatch encountered:
794///   * Schema mismatches (column names, order, or data types)
795///   * Height (row count) mismatch
796///   * Value mismatches in specific columns
797///
798/// # Order of Checks
799///
800/// 1. Schema validation (column names, order, and data types via `assert_dataframe_schema_equal`)
801/// 2. DataFrame height (row count)
802/// 3. Row ordering (sorts both DataFrames if `check_row_order` is false)
803/// 4. Column-by-column value comparison (delegated to `assert_series_values_equal`)
804///
805/// # Behavior
806///
807/// When `check_row_order` is false, both DataFrames are sorted using all columns to ensure
808/// consistent ordering before value comparison. This allows for row-order-independent equality
809/// checking while maintaining deterministic results.
810///
811pub fn assert_dataframe_equal(
812    left: &DataFrame,
813    right: &DataFrame,
814    options: DataFrameEqualOptions,
815) -> PolarsResult<()> {
816    assert_dataframe_schema_equal(
817        left,
818        right,
819        options.check_dtypes,
820        options.check_column_order,
821    )?;
822
823    if left.height() != right.height() {
824        return Err(polars_err!(
825            assertion_error = "DataFrames",
826            "height (row count) mismatch",
827            left.height(),
828            right.height()
829        ));
830    }
831
832    let left_cols = left.get_column_names_owned();
833
834    let (left, right) = if !options.check_row_order {
835        (
836            left.sort(left_cols.clone(), SortMultipleOptions::default())?,
837            right.sort(left_cols.clone(), SortMultipleOptions::default())?,
838        )
839    } else {
840        (left.clone(), right.clone())
841    };
842
843    for col in left_cols.iter() {
844        let s_left = left.column(col)?;
845        let s_right = right.column(col)?;
846
847        let s_left_series = s_left.as_materialized_series();
848        let s_right_series = s_right.as_materialized_series();
849
850        match assert_series_values_equal(
851            s_left_series,
852            s_right_series,
853            true,
854            options.check_exact,
855            options.rtol,
856            options.atol,
857            options.categorical_as_str,
858        ) {
859            Ok(_) => {},
860            Err(err) => {
861                return Err(polars_err!(
862                    assertion_error = "DataFrame",
863                    format!("value mismatch for column {:?}:, {}", col, err),
864                    format!("{:?}", s_left_series),
865                    format!("{:?}", s_right_series)
866                ));
867            },
868        }
869    }
870
871    Ok(())
872}