ruchy 4.1.1

A systems scripting language that transpiles to idiomatic Rust with extreme quality engineering
Documentation
#![allow(missing_docs)]
#![allow(clippy::cast_possible_wrap)]
//! DF-004: `DataFrame` `sort_by()` Property Tests (EXTREME TDD Validation)
//!
//! **CRITICAL**: Property tests with 10,000+ iterations to prove `sort_by()` correctness.
//!
//! **Note**: `sort_by()` already implemented, adding comprehensive test coverage retroactively.

use proptest::prelude::*;
use ruchy::runtime::eval_dataframe_ops::eval_dataframe_method;
use ruchy::runtime::{DataFrameColumn, Value};

/// Generate `DataFrame` with sortable numeric column
fn arb_numeric_dataframe(num_rows: usize) -> BoxedStrategy<Vec<DataFrameColumn>> {
    prop::collection::vec(any::<i64>(), num_rows..=num_rows)
        .prop_map(move |values| {
            let row_count = values.len();
            vec![
                DataFrameColumn {
                    name: "sort_col".to_string(),
                    values: values.into_iter().map(Value::Integer).collect(),
                },
                DataFrameColumn {
                    name: "id".to_string(),
                    values: (0..row_count as i64).map(Value::Integer).collect(),
                },
            ]
        })
        .boxed()
}

proptest! {
    #![proptest_config(ProptestConfig::with_cases(10000))]

    /// Property 1: sort_by() preserves row count
    /// Invariant: sorted_rows == original_rows
    #[test]
    fn prop_sort_preserves_row_count(
        columns in arb_numeric_dataframe(20)
    ) {
        let original_row_count = columns[0].values.len();

        let args = vec![Value::from_string("sort_col".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            prop_assert_eq!(
                sorted_cols[0].values.len(),
                original_row_count,
                "Sort must preserve row count"
            );
        }
    }

    /// Property 2: Sorted DataFrame is actually sorted (ascending)
    /// Invariant: ∀i, sorted[i] ≤ sorted[i+1]
    #[test]
    fn prop_sort_is_actually_sorted_ascending(
        values in prop::collection::vec(any::<i64>(), 1..50)
    ) {
        let columns = vec![DataFrameColumn {
            name: "nums".to_string(),
            values: values.into_iter().map(Value::Integer).collect(),
        }];

        let args = vec![Value::from_string("nums".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            // Check sorted order: each element <= next element
            for i in 0..sorted_cols[0].values.len().saturating_sub(1) {
                if let (Value::Integer(a), Value::Integer(b)) =
                    (&sorted_cols[0].values[i], &sorted_cols[0].values[i + 1]) {
                    prop_assert!(*a <= *b, "Values not in ascending order: {} > {}", a, b);
                }
            }
        }
    }

    /// Property 3: Descending sort is reverse of ascending
    /// Invariant: sort(asc) == reverse(sort(desc))
    #[test]
    fn prop_sort_descending_is_reverse(
        values in prop::collection::vec(any::<i64>(), 1..30)
    ) {
        let columns = vec![DataFrameColumn {
            name: "nums".to_string(),
            values: values.into_iter().map(Value::Integer).collect(),
        }];

        // Sort ascending
        let args_asc = vec![Value::from_string("nums".to_string()), Value::Bool(false)];
        let result_asc = eval_dataframe_method(&columns, "sort_by", &args_asc);

        // Sort descending
        let args_desc = vec![Value::from_string("nums".to_string()), Value::Bool(true)];
        let result_desc = eval_dataframe_method(&columns, "sort_by", &args_desc);

        prop_assert!(result_asc.is_ok() && result_desc.is_ok());

        if let (Ok(Value::DataFrame { columns: asc_cols }),
                Ok(Value::DataFrame { columns: desc_cols })) = (result_asc, result_desc) {
            let asc_values: Vec<_> = asc_cols[0].values.iter().collect();
            let desc_values: Vec<_> = desc_cols[0].values.iter().rev().collect();
            prop_assert_eq!(asc_values, desc_values, "Descending should be reverse of ascending");
        }
    }

    /// Property 4: Sort preserves multiset (all values still present)
    /// Invariant: multiset(sorted) == multiset(original)
    #[test]
    fn prop_sort_preserves_multiset(
        values in prop::collection::vec(any::<i64>(), 1..30)
    ) {
        let columns = vec![DataFrameColumn {
            name: "vals".to_string(),
            values: values.clone().into_iter().map(Value::Integer).collect(),
        }];

        let args = vec![Value::from_string("vals".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            let mut original_sorted = values;
            original_sorted.sort_unstable();

            let result_values: Vec<i64> = sorted_cols[0]
                .values
                .iter()
                .filter_map(|v| if let Value::Integer(i) = v { Some(*i) } else { None })
                .collect();

            prop_assert_eq!(result_values, original_sorted, "Sort must preserve all values");
        }
    }

    /// Property 5: Sort is stable (preserves row integrity)
    /// Invariant: Rows with same sort key maintain relative order
    #[test]
    fn prop_sort_preserves_row_integrity(
        num_rows in 5..30usize
    ) {
        // Create DataFrame with row IDs: sort_col has duplicates, id is unique
        let columns = vec![
            DataFrameColumn {
                name: "sort_col".to_string(),
                values: (0..num_rows).map(|i| Value::Integer((i % 3) as i64)).collect(),
            },
            DataFrameColumn {
                name: "id".to_string(),
                values: (0..num_rows).map(|i| Value::Integer(i as i64)).collect(),
            },
        ];

        let args = vec![Value::from_string("sort_col".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            // Verify each row's integrity: IDs should move with their sort_col values
            prop_assert_eq!(sorted_cols.len(), 2);
            prop_assert_eq!(sorted_cols[0].values.len(), sorted_cols[1].values.len());
        }
    }

    /// Property 6: Empty DataFrame sort returns empty
    /// Invariant: sort(empty) = empty
    #[test]
    fn prop_sort_empty_stays_empty(_x in 0..1000usize) {
        let columns: Vec<DataFrameColumn> = vec![];
        let args = vec![Value::from_string("any".to_string())];

        let result = eval_dataframe_method(&columns, "sort_by", &args);

        // Should error on empty DataFrame (no columns to sort)
        prop_assert!(result.is_err());
    }

    /// Property 7: Single-row DataFrame is already sorted
    /// Invariant: sort(single_row) = single_row
    #[test]
    fn prop_sort_single_row_unchanged(
        value in any::<i64>()
    ) {
        let columns = vec![DataFrameColumn {
            name: "val".to_string(),
            values: vec![Value::Integer(value)],
        }];

        let args = vec![Value::from_string("val".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            prop_assert_eq!(sorted_cols[0].values.len(), 1);
            prop_assert_eq!(sorted_cols[0].values[0].clone(), Value::Integer(value));
        }
    }

    /// Property 8: Sort with invalid column name fails
    /// Invariant: sort_by("nonexistent") → Err
    #[test]
    fn prop_sort_invalid_column_fails(
        values in prop::collection::vec(any::<i64>(), 1..10)
    ) {
        let columns = vec![DataFrameColumn {
            name: "real_col".to_string(),
            values: values.into_iter().map(Value::Integer).collect(),
        }];

        let args = vec![Value::from_string("nonexistent".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_err());
        if let Err(e) = result {
            prop_assert!(e.to_string().contains("not found"));
        }
    }

    /// Property 9: Sort idempotent (sorting twice = sorting once)
    /// Invariant: sort(sort(df)) = sort(df)
    #[test]
    fn prop_sort_idempotent(
        values in prop::collection::vec(any::<i64>(), 1..20)
    ) {
        let columns = vec![DataFrameColumn {
            name: "nums".to_string(),
            values: values.into_iter().map(Value::Integer).collect(),
        }];

        let args = vec![Value::from_string("nums".to_string())];

        // First sort
        let result1 = eval_dataframe_method(&columns, "sort_by", &args);
        prop_assert!(result1.is_ok());

        if let Ok(Value::DataFrame { columns: sorted_once }) = result1 {
            // Second sort on already sorted data
            let result2 = eval_dataframe_method(&sorted_once, "sort_by", &args);
            prop_assert!(result2.is_ok());

            if let Ok(Value::DataFrame { columns: sorted_twice }) = result2 {
                // Values should be identical
                prop_assert_eq!(
                    &sorted_once[0].values,
                    &sorted_twice[0].values,
                    "Sort should be idempotent"
                );
            }
        }
    }

    /// Property 10: Sort handles mixed numeric types correctly
    /// Invariant: Integer and Float values sort together correctly
    #[test]
    fn prop_sort_mixed_numeric_types(
        int_count in 1..10usize,
        float_count in 1..10usize
    ) {
        let mut values: Vec<Value> = Vec::new();
        for i in 0..int_count {
            values.push(Value::Integer(i as i64));
        }
        for i in 0..float_count {
            values.push(Value::Float(i as f64 + 0.5));
        }

        let columns = vec![DataFrameColumn {
            name: "mixed".to_string(),
            values,
        }];

        let args = vec![Value::from_string("mixed".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        prop_assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted_cols }) = result {
            // Just verify it completes without panic
            prop_assert_eq!(sorted_cols[0].values.len(), int_count + float_count);
        }
    }
}

#[cfg(test)]
mod unit_tests {
    use super::*;

    /// DF-004: Basic ascending sort
    #[test]
    fn test_df004_sort_ascending_basic() {
        let columns = vec![DataFrameColumn {
            name: "nums".to_string(),
            values: vec![
                Value::Integer(5),
                Value::Integer(2),
                Value::Integer(8),
                Value::Integer(1),
            ],
        }];

        let args = vec![Value::from_string("nums".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted }) = result {
            assert_eq!(sorted[0].values[0], Value::Integer(1));
            assert_eq!(sorted[0].values[1], Value::Integer(2));
            assert_eq!(sorted[0].values[2], Value::Integer(5));
            assert_eq!(sorted[0].values[3], Value::Integer(8));
        } else {
            panic!("Expected DataFrame");
        }
    }

    /// DF-004: Descending sort
    #[test]
    fn test_df004_sort_descending_basic() {
        let columns = vec![DataFrameColumn {
            name: "nums".to_string(),
            values: vec![
                Value::Integer(2),
                Value::Integer(8),
                Value::Integer(1),
                Value::Integer(5),
            ],
        }];

        let args = vec![Value::from_string("nums".to_string()), Value::Bool(true)];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted }) = result {
            assert_eq!(sorted[0].values[0], Value::Integer(8));
            assert_eq!(sorted[0].values[1], Value::Integer(5));
            assert_eq!(sorted[0].values[2], Value::Integer(2));
            assert_eq!(sorted[0].values[3], Value::Integer(1));
        }
    }

    /// DF-004: Sort preserves multiple columns
    #[test]
    fn test_df004_sort_multi_column_integrity() {
        let columns = vec![
            DataFrameColumn {
                name: "sort_key".to_string(),
                values: vec![Value::Integer(3), Value::Integer(1), Value::Integer(2)],
            },
            DataFrameColumn {
                name: "data".to_string(),
                values: vec![
                    Value::from_string("C".to_string()),
                    Value::from_string("A".to_string()),
                    Value::from_string("B".to_string()),
                ],
            },
        ];

        let args = vec![Value::from_string("sort_key".to_string())];
        let result = eval_dataframe_method(&columns, "sort_by", &args);

        assert!(result.is_ok());
        if let Ok(Value::DataFrame { columns: sorted }) = result {
            assert_eq!(sorted[0].values[0], Value::Integer(1));
            assert_eq!(sorted[1].values[0], Value::from_string("A".to_string()));
            assert_eq!(sorted[0].values[1], Value::Integer(2));
            assert_eq!(sorted[1].values[1], Value::from_string("B".to_string()));
            assert_eq!(sorted[0].values[2], Value::Integer(3));
            assert_eq!(sorted[1].values[2], Value::from_string("C".to_string()));
        }
    }
}