1use crate::Result;
12use nexcore_dataframe::{Agg, DataFrame};
13
14pub fn transform_humanization_gate(df: DataFrame, threshold: f64) -> Result<DataFrame> {
23 let threshold = threshold.clamp(0.0, 1.0);
24 tracing::info!(
25 stage = "humanization-gate",
26 threshold = threshold,
27 "Applying anti-regression filter for AI-generated text"
28 );
29
30 let filtered = df.filter_by("prob_generated", |v| {
32 v.as_f64().is_some_and(|p| p < threshold)
33 })?;
34
35 Ok(filtered)
36}
37
38pub fn transform_phrasing_discovery(df: DataFrame) -> Result<DataFrame> {
48 tracing::info!(
49 stage = "phrasing-discovery",
50 "Searching for natural phrasing alternatives"
51 );
52
53 let aggregated = df
55 .group_by(&["id"])?
56 .agg(&[Agg::First("text".into()), Agg::Min("prob_generated".into())])?;
57
58 Ok(aggregated)
59}
60
61#[allow(dead_code, reason = "placeholder for LLM integration")]
63pub fn humanize_text(text: &str) -> String {
64 format!("Refactored: {}", text)
67}
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72 use nexcore_dataframe::Column;
73
74 fn humanize_frame() -> crate::Result<DataFrame> {
75 Ok(DataFrame::new(vec![
76 Column::from_strs("id", &["a", "a", "b", "b"]),
77 Column::from_strs(
78 "text",
79 &["hello world", "greetings earth", "foo bar", "baz qux"],
80 ),
81 Column::from_f64s("prob_generated", vec![0.3, 0.8, 0.1, 0.9]),
82 ])?)
83 }
84
85 #[test]
86 fn humanization_gate_filters_above_threshold() -> crate::Result<()> {
87 let df = transform_humanization_gate(humanize_frame()?, 0.5)?;
88 assert_eq!(df.height(), 2);
90 Ok(())
91 }
92
93 #[test]
94 fn humanization_gate_clamps_threshold() -> crate::Result<()> {
95 let df = transform_humanization_gate(humanize_frame()?, 2.0)?;
97 assert_eq!(df.height(), 4);
98
99 let df = transform_humanization_gate(humanize_frame()?, -1.0)?;
101 assert_eq!(df.height(), 0);
102 Ok(())
103 }
104
105 #[test]
106 fn phrasing_discovery_aggregates_by_id() -> crate::Result<()> {
107 use nexcore_dataframe::Scalar;
108 let df = transform_phrasing_discovery(humanize_frame()?)?;
109 assert_eq!(df.height(), 2);
111
112 let ids = df.column("id")?;
113 let mins = df.column("prob_generated_min")?;
114 let _text = df.column("text_first")?;
116
117 let mut found_a = false;
118 let mut found_b = false;
119 for i in 0..df.height() {
120 match ids.get(i).as_ref().map(|s| s.to_string()).as_deref() {
121 Some("a") => {
122 if let Some(Scalar::Float64(v)) = mins.get(i) {
124 assert!(
125 (v - 0.3_f64).abs() < 1e-9,
126 "id=a min prob_generated must be 0.3, got {v}"
127 );
128 }
129 found_a = true;
130 }
131 Some("b") => {
132 if let Some(Scalar::Float64(v)) = mins.get(i) {
134 assert!(
135 (v - 0.1_f64).abs() < 1e-9,
136 "id=b min prob_generated must be 0.1, got {v}"
137 );
138 }
139 found_b = true;
140 }
141 _ => {}
142 }
143 }
144 assert!(found_a, "id=a must appear in result");
145 assert!(found_b, "id=b must appear in result");
146 Ok(())
147 }
148
149 #[test]
150 fn humanize_text_stub_is_non_empty_for_non_empty_input() {
151 let output = humanize_text("test input");
157 assert!(!output.is_empty(), "stub must return non-empty string");
158 assert_ne!(
159 output, "test input",
160 "stub must transform input, not echo it"
161 );
162 }
163}