1use crate::{
4 data::{FormatType, Extraction},
5 exceptions::{LangExtractError, LangExtractResult},
6 ExtractConfig
7};
8use serde_json::Value;
9use std::fs;
10use std::path::Path;
11use uuid::Uuid;
12use regex::Regex;
13
14#[derive(Debug, Clone)]
16pub struct ValidationConfig {
17 pub enable_schema_validation: bool,
19 pub enable_type_coercion: bool,
21 pub require_all_fields: bool,
23 pub save_raw_outputs: bool,
25 pub raw_outputs_dir: String,
27 pub quality_threshold: f32,
29}
30
31impl Default for ValidationConfig {
32 fn default() -> Self {
33 Self {
34 enable_schema_validation: true,
35 enable_type_coercion: true,
36 require_all_fields: false,
37 save_raw_outputs: true,
38 raw_outputs_dir: "./raw_outputs".to_string(),
39 quality_threshold: 0.0,
40 }
41 }
42}
43
44#[derive(Debug, Clone)]
46pub struct ValidationResult {
47 pub is_valid: bool,
49 pub errors: Vec<ValidationError>,
51 pub warnings: Vec<ValidationWarning>,
53 pub corrected_data: Option<Value>,
55 pub raw_output_file: Option<String>,
57 pub coercion_summary: Option<CoercionSummary>,
59}
60
61#[derive(Debug, Clone)]
63pub struct ValidationError {
64 pub message: String,
66 pub field_path: Option<String>,
68 pub expected: Option<String>,
70 pub actual: Option<String>,
72}
73
74#[derive(Debug, Clone)]
76pub struct ValidationWarning {
77 pub message: String,
79 pub field_path: Option<String>,
81}
82
83#[derive(Debug, Clone)]
85pub struct CoercionSummary {
86 pub successful_coercions: usize,
88 pub failed_coercions: usize,
90 pub coercion_details: Vec<CoercionDetail>,
92}
93
94#[derive(Debug, Clone)]
96pub struct CoercionDetail {
97 pub field_name: String,
99 pub original_value: String,
101 pub coerced_value: Option<Value>,
103 pub target_type: CoercionTargetType,
105 pub success: bool,
107 pub error_message: Option<String>,
109}
110
111#[derive(Debug, Clone, PartialEq)]
113pub enum CoercionTargetType {
114 Integer,
115 Float,
116 Boolean,
117 Currency,
118 Percentage,
119 Email,
120 PhoneNumber,
121 Date,
122 Url,
123}
124
125pub struct TypeCoercer {
127 enable_coercion: bool,
128 integer_regex: Regex,
130 float_regex: Regex,
131 currency_regex: Regex,
132 percentage_regex: Regex,
133 email_regex: Regex,
134 phone_regex: Regex,
135 date_regex: Regex,
136 url_regex: Regex,
137}
138
139impl TypeCoercer {
140 pub fn new(enable_coercion: bool) -> Self {
142 Self {
143 enable_coercion,
144 integer_regex: Regex::new(r"^[+-]?\d+$").unwrap(),
145 float_regex: Regex::new(r"^[+-]?\d*\.?\d+([eE][+-]?\d+)?$").unwrap(),
146 currency_regex: Regex::new(r"^\$+([\d,]+(?:\.\d{1,2})?)\s*(?:million|M|billion|B|thousand|K)?$|^([\d,]+(?:\.\d{1,2})?)\s*(?:million|M|billion|B|thousand|K)$").unwrap(),
147 percentage_regex: Regex::new(r"^(\d*\.?\d+)%$").unwrap(),
148 email_regex: Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap(),
149 phone_regex: Regex::new(r"^\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})$").unwrap(),
150 date_regex: Regex::new(r"^\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\w+ \d{1,2}, \d{4}$").unwrap(),
151 url_regex: Regex::new(r"^https?://[^\s/$.?#].[^\s]*$").unwrap(),
152 }
153 }
154
155 pub fn coerce_value(&self, field_name: &str, value: &str) -> CoercionDetail {
157 if !self.enable_coercion {
158 return CoercionDetail {
159 field_name: field_name.to_string(),
160 original_value: value.to_string(),
161 coerced_value: None,
162 target_type: CoercionTargetType::Integer, success: false,
164 error_message: Some("Type coercion disabled".to_string()),
165 };
166 }
167
168 let trimmed_value = value.trim();
169
170 if let Some(result) = self.try_coerce_percentage(field_name, trimmed_value) {
174 return result;
175 }
176
177 if let Some(result) = self.try_coerce_email(field_name, trimmed_value) {
179 return result;
180 }
181
182 if let Some(result) = self.try_coerce_phone(field_name, trimmed_value) {
184 return result;
185 }
186
187 if let Some(result) = self.try_coerce_url(field_name, trimmed_value) {
189 return result;
190 }
191
192 if let Some(result) = self.try_coerce_date(field_name, trimmed_value) {
194 return result;
195 }
196
197 if let Some(result) = self.try_coerce_currency(field_name, trimmed_value) {
199 return result;
200 }
201
202 if let Some(result) = self.try_coerce_integer(field_name, trimmed_value) {
204 return result;
205 }
206
207 if let Some(result) = self.try_coerce_float(field_name, trimmed_value) {
209 return result;
210 }
211
212 if let Some(result) = self.try_coerce_boolean(field_name, trimmed_value) {
214 return result;
215 }
216
217 CoercionDetail {
219 field_name: field_name.to_string(),
220 original_value: value.to_string(),
221 coerced_value: None,
222 target_type: CoercionTargetType::Integer, success: false,
224 error_message: Some("No applicable coercion found".to_string()),
225 }
226 }
227
228 fn try_coerce_integer(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
229 if self.integer_regex.is_match(value) {
230 match value.parse::<i64>() {
231 Ok(num) => Some(CoercionDetail {
232 field_name: field_name.to_string(),
233 original_value: value.to_string(),
234 coerced_value: Some(Value::Number(serde_json::Number::from(num))),
235 target_type: CoercionTargetType::Integer,
236 success: true,
237 error_message: None,
238 }),
239 Err(e) => Some(CoercionDetail {
240 field_name: field_name.to_string(),
241 original_value: value.to_string(),
242 coerced_value: None,
243 target_type: CoercionTargetType::Integer,
244 success: false,
245 error_message: Some(format!("Integer parse error: {}", e)),
246 }),
247 }
248 } else {
249 None
250 }
251 }
252
253 fn try_coerce_float(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
254 if self.float_regex.is_match(value) {
255 match value.parse::<f64>() {
256 Ok(num) => Some(CoercionDetail {
257 field_name: field_name.to_string(),
258 original_value: value.to_string(),
259 coerced_value: Some(Value::Number(serde_json::Number::from_f64(num).unwrap_or_else(|| serde_json::Number::from(0)))),
260 target_type: CoercionTargetType::Float,
261 success: true,
262 error_message: None,
263 }),
264 Err(e) => Some(CoercionDetail {
265 field_name: field_name.to_string(),
266 original_value: value.to_string(),
267 coerced_value: None,
268 target_type: CoercionTargetType::Float,
269 success: false,
270 error_message: Some(format!("Float parse error: {}", e)),
271 }),
272 }
273 } else {
274 None
275 }
276 }
277
278 fn try_coerce_boolean(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
279 let lower_value = value.to_lowercase();
280 match lower_value.as_str() {
281 "true" | "yes" | "y" | "on" | "enabled" => Some(CoercionDetail {
282 field_name: field_name.to_string(),
283 original_value: value.to_string(),
284 coerced_value: Some(Value::Bool(true)),
285 target_type: CoercionTargetType::Boolean,
286 success: true,
287 error_message: None,
288 }),
289 "false" | "no" | "n" | "off" | "disabled" => Some(CoercionDetail {
290 field_name: field_name.to_string(),
291 original_value: value.to_string(),
292 coerced_value: Some(Value::Bool(false)),
293 target_type: CoercionTargetType::Boolean,
294 success: true,
295 error_message: None,
296 }),
297 _ => None,
298 }
299 }
300
301 fn try_coerce_currency(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
302 if let Some(captures) = self.currency_regex.captures(value) {
303 let amount_str = captures.get(1).or_else(|| captures.get(2))?;
305 let amount_clean = amount_str.as_str().replace(",", "");
306 if let Ok(mut amount) = amount_clean.parse::<f64>() {
307 let lower_value = value.to_lowercase();
309 if lower_value.contains("million") || lower_value.contains("m") {
310 amount *= 1_000_000.0;
311 } else if lower_value.contains("billion") || lower_value.contains("b") {
312 amount *= 1_000_000_000.0;
313 } else if lower_value.contains("thousand") || lower_value.contains("k") {
314 amount *= 1_000.0;
315 }
316
317 return Some(CoercionDetail {
318 field_name: field_name.to_string(),
319 original_value: value.to_string(),
320 coerced_value: Some(Value::Number(serde_json::Number::from_f64(amount).unwrap_or_else(|| serde_json::Number::from(0)))),
321 target_type: CoercionTargetType::Currency,
322 success: true,
323 error_message: None,
324 });
325 }
326 }
327 None
328 }
329
330 fn try_coerce_percentage(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
331 if let Some(captures) = self.percentage_regex.captures(value) {
332 if let Some(percent_str) = captures.get(1) {
333 if let Ok(percent) = percent_str.as_str().parse::<f64>() {
334 return Some(CoercionDetail {
335 field_name: field_name.to_string(),
336 original_value: value.to_string(),
337 coerced_value: Some(Value::Number(serde_json::Number::from_f64(percent / 100.0).unwrap_or_else(|| serde_json::Number::from(0)))),
338 target_type: CoercionTargetType::Percentage,
339 success: true,
340 error_message: None,
341 });
342 }
343 }
344 }
345 None
346 }
347
348 fn try_coerce_email(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
349 if self.email_regex.is_match(value) {
350 Some(CoercionDetail {
351 field_name: field_name.to_string(),
352 original_value: value.to_string(),
353 coerced_value: Some(Value::Object({
354 let mut obj = serde_json::Map::new();
355 obj.insert("email".to_string(), Value::String(value.to_string()));
356 obj.insert("type".to_string(), Value::String("email".to_string()));
357 obj
358 })),
359 target_type: CoercionTargetType::Email,
360 success: true,
361 error_message: None,
362 })
363 } else {
364 None
365 }
366 }
367
368 fn try_coerce_phone(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
369 if let Some(captures) = self.phone_regex.captures(value) {
370 let area = captures.get(1)?.as_str();
371 let exchange = captures.get(2)?.as_str();
372 let number = captures.get(3)?.as_str();
373 let formatted = format!("({}) {}-{}", area, exchange, number);
374
375 Some(CoercionDetail {
376 field_name: field_name.to_string(),
377 original_value: value.to_string(),
378 coerced_value: Some(Value::Object({
379 let mut obj = serde_json::Map::new();
380 obj.insert("phone".to_string(), Value::String(formatted));
381 obj.insert("area_code".to_string(), Value::String(area.to_string()));
382 obj.insert("type".to_string(), Value::String("phone".to_string()));
383 obj
384 })),
385 target_type: CoercionTargetType::PhoneNumber,
386 success: true,
387 error_message: None,
388 })
389 } else {
390 None
391 }
392 }
393
394 fn try_coerce_date(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
395 if self.date_regex.is_match(value) {
396 Some(CoercionDetail {
397 field_name: field_name.to_string(),
398 original_value: value.to_string(),
399 coerced_value: Some(Value::Object({
400 let mut obj = serde_json::Map::new();
401 obj.insert("date".to_string(), Value::String(value.to_string()));
402 obj.insert("type".to_string(), Value::String("date".to_string()));
403 obj
404 })),
405 target_type: CoercionTargetType::Date,
406 success: true,
407 error_message: None,
408 })
409 } else {
410 None
411 }
412 }
413
414 fn try_coerce_url(&self, field_name: &str, value: &str) -> Option<CoercionDetail> {
415 if self.url_regex.is_match(value) {
416 Some(CoercionDetail {
417 field_name: field_name.to_string(),
418 original_value: value.to_string(),
419 coerced_value: Some(Value::Object({
420 let mut obj = serde_json::Map::new();
421 obj.insert("url".to_string(), Value::String(value.to_string()));
422 obj.insert("type".to_string(), Value::String("url".to_string()));
423 obj
424 })),
425 target_type: CoercionTargetType::Url,
426 success: true,
427 error_message: None,
428 })
429 } else {
430 None
431 }
432 }
433}
434
435pub struct Resolver {
437 fence_output: bool,
439 format_type: FormatType,
441 validation_config: ValidationConfig,
443 type_coercer: TypeCoercer,
445}
446
447impl Resolver {
448 pub fn new(config: &ExtractConfig, fence_output: bool) -> LangExtractResult<Self> {
450 let validation_config = ValidationConfig {
451 save_raw_outputs: config.debug, ..Default::default()
453 };
454
455 if validation_config.save_raw_outputs {
457 if let Err(e) = fs::create_dir_all(&validation_config.raw_outputs_dir) {
458 log::warn!("Failed to create raw outputs directory: {}", e);
459 }
460 }
461
462 let type_coercer = TypeCoercer::new(validation_config.enable_type_coercion);
463
464 Ok(Self {
465 fence_output,
466 format_type: config.format_type,
467 validation_config,
468 type_coercer,
469 })
470 }
471
472 pub fn with_validation_config(
474 config: &ExtractConfig,
475 fence_output: bool,
476 validation_config: ValidationConfig
477 ) -> LangExtractResult<Self> {
478 if validation_config.save_raw_outputs {
480 if let Err(e) = fs::create_dir_all(&validation_config.raw_outputs_dir) {
481 log::warn!("Failed to create raw outputs directory: {}", e);
482 }
483 }
484
485 let type_coercer = TypeCoercer::new(validation_config.enable_type_coercion);
486
487 Ok(Self {
488 fence_output,
489 format_type: config.format_type,
490 validation_config,
491 type_coercer,
492 })
493 }
494
495 pub fn fence_output(&self) -> bool {
497 self.fence_output
498 }
499
500 pub fn save_raw_output(&self, raw_output: &str, metadata: Option<&str>) -> LangExtractResult<String> {
502 if !self.validation_config.save_raw_outputs {
503 return Err(LangExtractError::configuration("Raw output saving is disabled"));
504 }
505
506 let output_dir = Path::new(&self.validation_config.raw_outputs_dir);
508 if !output_dir.exists() {
509 fs::create_dir_all(output_dir).map_err(|e| {
510 LangExtractError::IoError(e)
511 })?;
512 }
513
514 let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S").to_string();
515 let unique_id = Uuid::new_v4().to_string()[..8].to_string();
516 let filename = format!("raw_output_{}_{}.txt", timestamp, unique_id);
517 let filepath = output_dir.join(&filename);
518
519 let mut content = String::new();
520 content.push_str(&format!("=== Raw Model Output ===\n"));
521 content.push_str(&format!("Timestamp: {}\n", chrono::Utc::now().to_rfc3339()));
522 if let Some(meta) = metadata {
523 content.push_str(&format!("Metadata: {}\n", meta));
524 }
525 content.push_str(&format!("Format: {:?}\n", self.format_type));
526 content.push_str(&format!("Content Length: {} chars\n", raw_output.len()));
527 content.push_str(&format!("=== Output Content ===\n"));
528 content.push_str(raw_output);
529 content.push_str("\n=== End Output ===\n");
530
531 fs::write(&filepath, content).map_err(|e| {
532 LangExtractError::IoError(e)
533 })?;
534
535 let path_str = filepath.to_string_lossy().to_string();
536 log::info!("Saved raw output to: {}", path_str);
537 Ok(path_str)
538 }
539
540 #[tracing::instrument(skip_all, fields(response_len = raw_response.len(), num_expected_fields = expected_fields.len()))]
542 pub fn validate_and_parse(&self, raw_response: &str, expected_fields: &[String]) -> LangExtractResult<(Vec<Extraction>, ValidationResult)> {
543 let raw_file_path = if self.validation_config.save_raw_outputs {
545 match self.save_raw_output(raw_response, Some("validation_parse")) {
546 Ok(path) => {
547 log::debug!("Raw output saved to: {}", path);
548 Some(path)
549 }
550 Err(e) => {
551 log::warn!("Failed to save raw output: {}", e);
552 None
553 }
554 }
555 } else {
556 None
557 };
558
559 log::debug!("Parsing model response...");
561 let parse_result = self.parse_response_with_repair(raw_response, expected_fields);
562
563 let mut validation_result = match &parse_result {
565 Ok(extractions) => {
566 log::debug!("Successfully parsed {} potential extractions", extractions.len());
567 self.validate_extractions(extractions, expected_fields)
568 }
569 Err(parse_error) => {
570 log::debug!("Failed to parse model response");
571 ValidationResult {
573 is_valid: false,
574 errors: vec![ValidationError {
575 message: format!("Failed to parse response: {}", parse_error),
576 field_path: None,
577 expected: Some("Valid JSON structure".to_string()),
578 actual: Some("Unparseable content".to_string()),
579 }],
580 warnings: vec![],
581 corrected_data: None,
582 raw_output_file: raw_file_path.clone(), coercion_summary: None,
584 }
585 }
586 };
587
588 if validation_result.raw_output_file.is_none() {
590 validation_result.raw_output_file = raw_file_path.clone();
591 }
592
593 match parse_result {
595 Ok(extractions) => Ok((extractions, validation_result)),
596 Err(e) => {
597 match &validation_result.raw_output_file {
599 Some(path) => {
600 log::warn!("Parse failed but raw data saved to: {}", path);
601 log::warn!("Parse failed - check raw output at: {}", path);
602 }
603 None => {
604 log::warn!("Parse failed and no raw data was saved");
605 log::warn!("Parse failed and raw data could not be saved");
606 }
607 }
608 Err(e)
609 }
610 }
611 }
612
613 fn clean_response(&self, response: &str) -> String {
615 let mut cleaned = response.to_string();
616
617 cleaned = cleaned.replace("```json", "");
620 cleaned = cleaned.replace("```yaml", "");
621 cleaned = cleaned.replace("```python", "");
622 cleaned = cleaned.replace("```javascript", "");
623 cleaned = cleaned.replace("```rust", "");
624 cleaned = cleaned.replace("```", "");
625
626 cleaned.trim().to_string()
628 }
629
630 fn detect_and_repair_malformed_json(&self, json: &serde_json::Value, expected_fields: &[String]) -> Option<serde_json::Value> {
632 if let Some(obj) = json.as_object() {
634 if obj.len() == 1 {
636 if let Some((single_key, single_value)) = obj.iter().next() {
637 if let Some(extraction_text) = single_value.as_str() {
638 let mut found_fields = Vec::new();
640
641 for field in expected_fields {
642 let patterns = [
644 format!(r"(?i){}[:\-=]\s*([^\n\r,]*)", regex::escape(field)),
645 format!(r"(?i){}\s*[:\-=]\s*([^\n\r,]*)", regex::escape(field)),
646 format!(r"(?i){}[:\-=]\s*([^,\n\r]+)", regex::escape(field)),
647 ];
648
649 for pattern in &patterns {
650 if let Ok(regex) = Regex::new(pattern) {
651 if regex.is_match(extraction_text) {
652 found_fields.push(field.clone());
653 break; }
655 }
656 }
657 }
658
659 if found_fields.len() > 1 {
662 log::debug!("Detected malformed JSON: {} extraction classes found in single extraction_text '{}'",
663 found_fields.len(), single_key);
664
665 let mut repaired_obj = serde_json::Map::new();
667
668 for field in &found_fields {
669 let patterns = [
670 format!(r"(?i){}[:\-=]\s*([^\n\r,]*)", regex::escape(field)),
671 format!(r"(?i){}\s*[:\-=]\s*([^\n\r,]*)", regex::escape(field)),
672 ];
673
674 for pattern in &patterns {
675 if let Ok(regex) = Regex::new(pattern) {
676 if let Some(captures) = regex.captures(extraction_text) {
677 if let Some(value_match) = captures.get(1) {
678 let value = value_match.as_str().trim();
679 if !value.is_empty() {
680 repaired_obj.insert(field.clone(), serde_json::Value::String(value.to_string()));
681 break;
682 }
683 }
684 }
685 }
686 }
687 }
688
689 if !repaired_obj.is_empty() {
690 log::debug!("Successfully repaired malformed JSON, extracted {} fields", repaired_obj.len());
691 return Some(serde_json::Value::Object(repaired_obj));
692 }
693 }
694 }
695 }
696 }
697 }
698
699 None }
701
702 fn parse_response_with_repair(&self, response: &str, expected_fields: &[String]) -> LangExtractResult<Vec<Extraction>> {
704 let cleaned_response = self.clean_response(response);
706 log::debug!("Cleaned response length: {} chars", cleaned_response.len());
707
708 if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&cleaned_response) {
710 log::debug!("Parsed JSON successfully");
711
712 if let Some(repaired_json) = self.detect_and_repair_malformed_json(&json_value, expected_fields) {
714 log::debug!("Applied JSON repair logic");
715 return self.parse_json_response(&repaired_json);
716 } else {
717 return self.parse_json_response(&json_value);
718 }
719 }
720
721 if let Some(json_start) = cleaned_response.find('{') {
723 if let Some(json_end) = cleaned_response.rfind('}') {
724 let json_str = &cleaned_response[json_start..=json_end];
725 if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(json_str) {
726 log::debug!("Extracted and parsed JSON from wrapped content");
727
728 if let Some(repaired_json) = self.detect_and_repair_malformed_json(&json_value, expected_fields) {
730 log::debug!("Applied JSON repair logic to extracted content");
731 return self.parse_json_response(&repaired_json);
732 } else {
733 return self.parse_json_response(&json_value);
734 }
735 }
736 }
737 }
738
739 Err(LangExtractError::parsing(
740 format!("Could not parse response as JSON after cleaning: {}", cleaned_response)
741 ))
742 }
743
744
745
746 fn parse_json_response(&self, json: &serde_json::Value) -> LangExtractResult<Vec<Extraction>> {
748 let mut extractions = Vec::new();
749
750 if let Some(array) = json.as_array() {
752 for (index, item) in array.iter().enumerate() {
753 extractions.extend(self.parse_single_item(item, Some(index))?);
754 }
755 return Ok(extractions);
756 }
757
758 if let Some(obj) = json.as_object() {
760 if let Some(data_array) = obj.get("data").and_then(|v| v.as_array()) {
761 for (index, item) in data_array.iter().enumerate() {
762 extractions.extend(self.parse_single_item(item, Some(index))?);
763 }
764 return Ok(extractions);
765 }
766 if let Some(results_array) = obj.get("results").and_then(|v| v.as_array()) {
767 for (index, item) in results_array.iter().enumerate() {
768 extractions.extend(self.parse_single_item(item, Some(index))?);
769 }
770 return Ok(extractions);
771 }
772
773 extractions.extend(self.parse_single_item(json, None)?);
775 }
776
777 Ok(extractions)
778 }
779
780 fn parse_single_item(&self, item: &serde_json::Value, index: Option<usize>) -> LangExtractResult<Vec<Extraction>> {
782 let mut extractions = Vec::new();
783
784 match item {
785 Value::Object(obj) => {
786 for (key, value) in obj {
787 let extraction_text = match value {
788 Value::String(s) => s.clone(),
789 Value::Number(n) => n.to_string(),
790 Value::Bool(b) => b.to_string(),
791 Value::Array(_) | Value::Object(_) => value.to_string(),
792 Value::Null => continue,
793 };
794
795 let mut extraction = Extraction::new(key.clone(), extraction_text);
796 if let Some(idx) = index {
797 extraction.group_index = Some(idx);
798 }
799 extractions.push(extraction);
800 }
801 }
802 Value::String(s) => {
803 let extraction_class = if let Some(idx) = index {
804 format!("item_{}", idx)
805 } else {
806 "text".to_string()
807 };
808 extractions.push(Extraction::new(extraction_class, s.clone()));
809 }
810 _ => {
811 return Err(LangExtractError::parsing(
812 format!("Unsupported item type: {:?}", item)
813 ));
814 }
815 }
816
817 Ok(extractions)
818 }
819
820 fn validate_extractions(&self, extractions: &[Extraction], expected_fields: &[String]) -> ValidationResult {
822 let mut errors = Vec::new();
823 let mut warnings = Vec::new();
824 let mut is_valid = true;
825 let mut coercion_details = Vec::new();
826
827 if self.validation_config.require_all_fields {
829 let extraction_classes: std::collections::HashSet<_> =
830 extractions.iter().map(|e| &e.extraction_class).collect();
831
832 for expected_field in expected_fields {
833 if !extraction_classes.contains(expected_field) {
834 errors.push(ValidationError {
835 message: format!("Required field '{}' is missing", expected_field),
836 field_path: Some(expected_field.clone()),
837 expected: Some("Present".to_string()),
838 actual: Some("Missing".to_string()),
839 });
840 is_valid = false;
841 }
842 }
843 }
844
845 for extraction in extractions {
847 if extraction.extraction_text.trim().is_empty() {
849 warnings.push(ValidationWarning {
850 message: format!("Empty extraction text for field '{}'", extraction.extraction_class),
851 field_path: Some(extraction.extraction_class.clone()),
852 });
853 }
854
855 if extraction.extraction_text.len() > 1000 {
857 warnings.push(ValidationWarning {
858 message: format!("Very long extraction text ({} chars) for field '{}'",
859 extraction.extraction_text.len(), extraction.extraction_class),
860 field_path: Some(extraction.extraction_class.clone()),
861 });
862 }
863
864 if self.validation_config.enable_type_coercion {
866 let coercion_result = self.type_coercer.coerce_value(
867 &extraction.extraction_class,
868 &extraction.extraction_text
869 );
870 coercion_details.push(coercion_result);
871 }
872 }
873
874 if extractions.len() < expected_fields.len() / 2 {
876 warnings.push(ValidationWarning {
877 message: format!("Low extraction count: found {} but expected around {}",
878 extractions.len(), expected_fields.len()),
879 field_path: None,
880 });
881 }
882
883 let corrected_data = if !coercion_details.is_empty() && coercion_details.iter().any(|d| d.success) {
885 let mut corrected_obj = serde_json::Map::new();
886
887 for detail in &coercion_details {
888 if detail.success {
889 if let Some(ref coerced_value) = detail.coerced_value {
890 corrected_obj.insert(detail.field_name.clone(), coerced_value.clone());
891 }
892 } else {
893 corrected_obj.insert(detail.field_name.clone(), Value::String(detail.original_value.clone()));
895 }
896 }
897
898 Some(Value::Object(corrected_obj))
899 } else {
900 None
901 };
902
903 let coercion_summary = if !coercion_details.is_empty() {
905 let successful_coercions = coercion_details.iter().filter(|d| d.success).count();
906 let failed_coercions = coercion_details.len() - successful_coercions;
907
908 Some(CoercionSummary {
909 successful_coercions,
910 failed_coercions,
911 coercion_details,
912 })
913 } else {
914 None
915 };
916
917 ValidationResult {
918 is_valid: is_valid && errors.is_empty(),
919 errors,
920 warnings,
921 corrected_data,
922 raw_output_file: None, coercion_summary,
924 }
925 }
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use crate::ExtractConfig;
932 use std::fs;
933 use tempfile::TempDir;
934
935 fn create_test_config() -> ExtractConfig {
936 ExtractConfig {
937 debug: true,
938 ..Default::default()
939 }
940 }
941
942 fn create_test_resolver() -> Resolver {
943 let config = create_test_config();
944 Resolver::new(&config, true).unwrap()
945 }
946
947 fn create_test_resolver_with_temp_dir(temp_dir: &TempDir) -> Resolver {
948 let config = create_test_config();
949 let validation_config = ValidationConfig {
950 save_raw_outputs: true,
951 raw_outputs_dir: temp_dir.path().to_string_lossy().to_string(),
952 ..Default::default()
953 };
954 Resolver::with_validation_config(&config, true, validation_config).unwrap()
955 }
956
957 #[test]
958 fn test_validation_config_default() {
959 let config = ValidationConfig::default();
960 assert!(config.enable_schema_validation);
961 assert!(config.enable_type_coercion);
962 assert!(!config.require_all_fields);
963 assert!(config.save_raw_outputs);
964 assert_eq!(config.raw_outputs_dir, "./raw_outputs");
965 assert_eq!(config.quality_threshold, 0.0);
966 }
967
968 #[test]
969 fn test_raw_output_saving() {
970 let temp_dir = TempDir::new().unwrap();
971 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
972
973 let test_output = r#"{"person": "John Doe", "age": "30"}"#;
974 let result = resolver.save_raw_output(test_output, Some("test_metadata"));
975
976 assert!(result.is_ok());
977 let file_path = result.unwrap();
978 assert!(std::path::Path::new(&file_path).exists());
979
980 let content = fs::read_to_string(&file_path).unwrap();
981 assert!(content.contains("Raw Model Output"));
982 assert!(content.contains("test_metadata"));
983 assert!(content.contains(test_output));
984 }
985
986 #[test]
987 fn test_parse_valid_json() {
988 let resolver = create_test_resolver();
989 let json_response = r#"[{"person": "John Doe", "age": "30"}]"#;
990 let expected_fields = vec!["person".to_string(), "age".to_string()];
991
992 let result = resolver.parse_response_with_repair(json_response, &expected_fields);
993 assert!(result.is_ok());
994
995 let extractions = result.unwrap();
996 assert_eq!(extractions.len(), 2);
997
998 let classes: std::collections::HashSet<_> = extractions.iter()
1000 .map(|e| e.extraction_class.as_str()).collect();
1001 assert!(classes.contains("person"));
1002 assert!(classes.contains("age"));
1003
1004 let person_extraction = extractions.iter().find(|e| e.extraction_class == "person").unwrap();
1006 assert_eq!(person_extraction.extraction_text, "John Doe");
1007 let age_extraction = extractions.iter().find(|e| e.extraction_class == "age").unwrap();
1008 assert_eq!(age_extraction.extraction_text, "30");
1009 }
1010
1011 #[test]
1012 fn test_parse_wrapped_json() {
1013 let resolver = create_test_resolver();
1014 let json_response = r#"{"data": [{"name": "Alice", "city": "NYC"}]}"#;
1015 let expected_fields = vec!["name".to_string(), "city".to_string()];
1016
1017 let result = resolver.parse_response_with_repair(json_response, &expected_fields);
1018 assert!(result.is_ok());
1019
1020 let extractions = result.unwrap();
1021 assert_eq!(extractions.len(), 2);
1022
1023 let classes: std::collections::HashSet<_> = extractions.iter()
1025 .map(|e| e.extraction_class.as_str()).collect();
1026 assert!(classes.contains("name"));
1027 assert!(classes.contains("city"));
1028
1029 let name_extraction = extractions.iter().find(|e| e.extraction_class == "name").unwrap();
1031 assert_eq!(name_extraction.extraction_text, "Alice");
1032 let city_extraction = extractions.iter().find(|e| e.extraction_class == "city").unwrap();
1033 assert_eq!(city_extraction.extraction_text, "NYC");
1034 }
1035
1036 #[test]
1037 fn test_parse_invalid_json() {
1038 let resolver = create_test_resolver();
1039 let invalid_response = r#"This is not JSON at all!"#;
1040 let expected_fields = vec!["name".to_string()];
1041
1042 let result = resolver.parse_response_with_repair(invalid_response, &expected_fields);
1043 assert!(result.is_err());
1044 }
1045
1046 #[test]
1047 fn test_validation_required_fields() {
1048 let resolver = create_test_resolver();
1049 let extractions = vec![
1050 Extraction::new("person".to_string(), "John".to_string()),
1051 ];
1052 let expected_fields = vec!["person".to_string(), "age".to_string()];
1053
1054 let result = resolver.validate_extractions(&extractions, &expected_fields);
1056 assert!(result.is_valid); let config = create_test_config();
1060 let validation_config = ValidationConfig {
1061 require_all_fields: true,
1062 save_raw_outputs: false,
1063 ..Default::default()
1064 };
1065 let resolver = Resolver::with_validation_config(&config, true, validation_config).unwrap();
1066 let result = resolver.validate_extractions(&extractions, &expected_fields);
1067 assert!(!result.is_valid); assert_eq!(result.errors.len(), 1);
1069 assert!(result.errors[0].message.contains("age"));
1070 }
1071
1072 #[test]
1073 fn test_validation_empty_extractions() {
1074 let resolver = create_test_resolver();
1075 let extractions = vec![
1076 Extraction::new("person".to_string(), "".to_string()), Extraction::new("age".to_string(), "25".to_string()),
1078 ];
1079 let expected_fields = vec!["person".to_string(), "age".to_string()];
1080
1081 let result = resolver.validate_extractions(&extractions, &expected_fields);
1082 assert!(result.is_valid); assert_eq!(result.warnings.len(), 1); assert!(result.warnings[0].message.contains("Empty extraction text"));
1085 }
1086
1087 #[test]
1088 fn test_validation_low_extraction_count() {
1089 let resolver = create_test_resolver();
1090 let extractions = vec![
1091 Extraction::new("person".to_string(), "John".to_string()),
1092 ];
1093 let expected_fields = vec![
1094 "person".to_string(),
1095 "age".to_string(),
1096 "city".to_string(),
1097 "email".to_string()
1098 ]; let result = resolver.validate_extractions(&extractions, &expected_fields);
1101 assert!(result.is_valid); assert!(!result.warnings.is_empty());
1103 assert!(result.warnings.iter().any(|w| w.message.contains("Low extraction count")));
1104 }
1105
1106 #[test]
1107 fn test_validate_and_parse_success() {
1108 let temp_dir = TempDir::new().unwrap();
1109 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1110
1111 let valid_json = r#"{"person": "John Doe", "age": "30"}"#;
1112 let expected_fields = vec!["person".to_string(), "age".to_string()];
1113
1114 let result = resolver.validate_and_parse(valid_json, &expected_fields);
1115 assert!(result.is_ok());
1116
1117 let (extractions, validation_result) = result.unwrap();
1118 assert_eq!(extractions.len(), 2);
1119 assert!(validation_result.is_valid);
1120 assert!(validation_result.raw_output_file.is_some());
1121
1122 let raw_file = validation_result.raw_output_file.unwrap();
1124 assert!(std::path::Path::new(&raw_file).exists());
1125 }
1126
1127 #[test]
1128 fn test_validate_and_parse_parse_failure() {
1129 let temp_dir = TempDir::new().unwrap();
1130 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1131
1132 let invalid_json = "This is definitely not JSON!";
1133 let expected_fields = vec!["person".to_string()];
1134
1135 let result = resolver.validate_and_parse(invalid_json, &expected_fields);
1136 assert!(result.is_err());
1137 }
1139
1140 #[test]
1141 fn test_clean_response_removes_code_fences() {
1142 let temp_dir = TempDir::new().unwrap();
1143 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1144
1145 let test_cases = vec![
1147 (r#"```json{"name": "John"}```"#, r#"{"name": "John"}"#),
1148 (r#"```yaml{"name": "John"}```"#, r#"{"name": "John"}"#),
1149 (r#"```{"name": "John"}```"#, r#"{"name": "John"}"#),
1150 (r#"```python{"name": "John"}```"#, r#"{"name": "John"}"#),
1151 (r#"Some text ```json{"name": "John"}``` more text"#, r#"Some text {"name": "John"} more text"#),
1152 ];
1153
1154 for (input, expected) in test_cases {
1155 let cleaned = resolver.clean_response(input);
1156 assert_eq!(cleaned, expected, "Failed to clean: {}", input);
1157 }
1158 }
1159
1160 #[test]
1161 fn test_detect_and_repair_malformed_json() {
1162 let temp_dir = TempDir::new().unwrap();
1163 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1164 let expected_fields = vec!["name".to_string(), "age".to_string(), "city".to_string()];
1165
1166 let malformed_json: serde_json::Value = serde_json::json!({
1168 "person": "name: John Doe, age: 30, city: New York"
1169 });
1170
1171 let repaired = resolver.detect_and_repair_malformed_json(&malformed_json, &expected_fields);
1172 assert!(repaired.is_some(), "Should detect malformed JSON");
1173
1174 let repaired = repaired.unwrap();
1175 if let Some(obj) = repaired.as_object() {
1176 assert!(obj.contains_key("name"), "Should extract name field");
1178 assert!(obj.contains_key("age"), "Should extract age field");
1179 assert!(obj.contains_key("city"), "Should extract city field");
1180
1181 assert_eq!(obj.get("name").unwrap().as_str().unwrap(), "John Doe");
1182 assert_eq!(obj.get("age").unwrap().as_str().unwrap(), "30");
1183 assert_eq!(obj.get("city").unwrap().as_str().unwrap(), "New York");
1184 } else {
1185 panic!("Repaired JSON should be an object");
1186 }
1187
1188 let well_formed_json: serde_json::Value = serde_json::json!({
1190 "name": "John Doe",
1191 "age": "30",
1192 "city": "New York"
1193 });
1194
1195 let repaired = resolver.detect_and_repair_malformed_json(&well_formed_json, &expected_fields);
1196 assert!(repaired.is_none(), "Well-formed JSON should not be repaired");
1197 }
1198
1199 #[test]
1200 fn test_parse_response_with_code_fences() {
1201 let temp_dir = TempDir::new().unwrap();
1202 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1203 let expected_fields = vec!["name".to_string(), "age".to_string()];
1204
1205 let fenced_response = r#"```json
1207{
1208 "name": "Alice",
1209 "age": "25"
1210}
1211```"#;
1212
1213 let result = resolver.parse_response_with_repair(fenced_response, &expected_fields);
1214 assert!(result.is_ok(), "Should parse fenced JSON successfully");
1215
1216 let extractions = result.unwrap();
1217 assert_eq!(extractions.len(), 2, "Should extract 2 fields");
1218
1219 let names: Vec<_> = extractions.iter().filter(|e| e.extraction_class == "name").collect();
1220 let ages: Vec<_> = extractions.iter().filter(|e| e.extraction_class == "age").collect();
1221
1222 assert_eq!(names.len(), 1);
1223 assert_eq!(ages.len(), 1);
1224 assert_eq!(names[0].extraction_text, "Alice");
1225 assert_eq!(ages[0].extraction_text, "25");
1226 }
1227
1228 #[test]
1229 fn test_parse_response_with_malformed_repair() {
1230 let temp_dir = TempDir::new().unwrap();
1231 let resolver = create_test_resolver_with_temp_dir(&temp_dir);
1232 let expected_fields = vec!["name".to_string(), "age".to_string(), "profession".to_string()];
1233
1234 let malformed_response = r#"{
1236 "person": "name: Bob Smith, age: 35, profession: engineer"
1237}"#;
1238
1239 let result = resolver.parse_response_with_repair(malformed_response, &expected_fields);
1240 assert!(result.is_ok(), "Should parse and repair malformed JSON successfully");
1241
1242 let extractions = result.unwrap();
1243 assert_eq!(extractions.len(), 3, "Should extract 3 separate fields after repair");
1244
1245 let name_found = extractions.iter().any(|e| e.extraction_class == "name" && e.extraction_text == "Bob Smith");
1246 let age_found = extractions.iter().any(|e| e.extraction_class == "age" && e.extraction_text == "35");
1247 let profession_found = extractions.iter().any(|e| e.extraction_class == "profession" && e.extraction_text == "engineer");
1248
1249 assert!(name_found, "Should find extracted name");
1250 assert!(age_found, "Should find extracted age");
1251 assert!(profession_found, "Should find extracted profession");
1252 }
1253
1254 mod type_coercion_tests {
1256 use super::*;
1257
1258 fn create_coercion_resolver() -> Resolver {
1259 let config = create_test_config();
1260 let validation_config = ValidationConfig {
1261 enable_type_coercion: true,
1262 save_raw_outputs: false,
1263 ..Default::default()
1264 };
1265 Resolver::with_validation_config(&config, true, validation_config).unwrap()
1266 }
1267
1268 #[test]
1269 fn test_integer_coercion() {
1270 let resolver = create_coercion_resolver();
1271 let extractions = vec![
1272 Extraction::new("age".to_string(), "25".to_string()),
1273 Extraction::new("count".to_string(), "-10".to_string()),
1274 Extraction::new("year".to_string(), "2024".to_string()),
1275 ];
1276 let expected_fields = vec!["age".to_string(), "count".to_string(), "year".to_string()];
1277
1278 let result = resolver.validate_extractions(&extractions, &expected_fields);
1279 assert!(result.is_valid);
1280
1281 let coercion_summary = result.coercion_summary.unwrap();
1282 assert_eq!(coercion_summary.successful_coercions, 3);
1283 assert_eq!(coercion_summary.failed_coercions, 0);
1284
1285 let age_coercion = coercion_summary.coercion_details.iter()
1287 .find(|d| d.field_name == "age").unwrap();
1288 assert!(age_coercion.success);
1289 assert_eq!(age_coercion.target_type, CoercionTargetType::Integer);
1290 assert_eq!(age_coercion.coerced_value.as_ref().unwrap().as_i64().unwrap(), 25);
1291 }
1292
1293 #[test]
1294 fn test_float_coercion() {
1295 let resolver = create_coercion_resolver();
1296 let extractions = vec![
1297 Extraction::new("score".to_string(), "94.7".to_string()),
1298 Extraction::new("percentage".to_string(), "-12.5".to_string()),
1299 Extraction::new("scientific".to_string(), "1.23e-4".to_string()),
1300 ];
1301 let expected_fields = vec!["score".to_string(), "percentage".to_string(), "scientific".to_string()];
1302
1303 let result = resolver.validate_extractions(&extractions, &expected_fields);
1304 assert!(result.is_valid);
1305
1306 let coercion_summary = result.coercion_summary.unwrap();
1307 assert_eq!(coercion_summary.successful_coercions, 3);
1308
1309 let score_coercion = coercion_summary.coercion_details.iter()
1310 .find(|d| d.field_name == "score").unwrap();
1311 assert!(score_coercion.success);
1312 assert_eq!(score_coercion.target_type, CoercionTargetType::Float);
1313 assert!((score_coercion.coerced_value.as_ref().unwrap().as_f64().unwrap() - 94.7).abs() < 0.01);
1314 }
1315
1316 #[test]
1317 fn test_boolean_coercion() {
1318 let resolver = create_coercion_resolver();
1319 let extractions = vec![
1320 Extraction::new("active".to_string(), "true".to_string()),
1321 Extraction::new("enabled".to_string(), "yes".to_string()),
1322 Extraction::new("disabled".to_string(), "false".to_string()),
1323 Extraction::new("off".to_string(), "no".to_string()),
1324 Extraction::new("binary".to_string(), "1".to_string()),
1325 Extraction::new("zero".to_string(), "0".to_string()),
1326 ];
1327 let expected_fields = vec!["active".to_string(), "enabled".to_string(), "disabled".to_string(), "off".to_string(), "binary".to_string(), "zero".to_string()];
1328
1329 let result = resolver.validate_extractions(&extractions, &expected_fields);
1330 assert!(result.is_valid);
1331
1332 let coercion_summary = result.coercion_summary.unwrap();
1333 assert_eq!(coercion_summary.successful_coercions, 6);
1334
1335 let active_coercion = coercion_summary.coercion_details.iter()
1336 .find(|d| d.field_name == "active").unwrap();
1337 assert!(active_coercion.success);
1338 assert_eq!(active_coercion.target_type, CoercionTargetType::Boolean);
1339 assert_eq!(active_coercion.coerced_value.as_ref().unwrap().as_bool().unwrap(), true);
1340
1341 let binary_coercion = coercion_summary.coercion_details.iter()
1343 .find(|d| d.field_name == "binary").unwrap();
1344 assert!(binary_coercion.success);
1345 assert_eq!(binary_coercion.target_type, CoercionTargetType::Integer);
1346 assert_eq!(binary_coercion.coerced_value.as_ref().unwrap().as_i64().unwrap(), 1);
1347
1348 let zero_coercion = coercion_summary.coercion_details.iter()
1349 .find(|d| d.field_name == "zero").unwrap();
1350 assert!(zero_coercion.success);
1351 assert_eq!(zero_coercion.target_type, CoercionTargetType::Integer);
1352 assert_eq!(zero_coercion.coerced_value.as_ref().unwrap().as_i64().unwrap(), 0);
1353 }
1354
1355 #[test]
1356 fn test_currency_coercion() {
1357 let resolver = create_coercion_resolver();
1358 let extractions = vec![
1359 Extraction::new("funding".to_string(), "$1.5 million".to_string()),
1360 Extraction::new("budget".to_string(), "$2.3M".to_string()),
1361 Extraction::new("salary".to_string(), "$75,000".to_string()),
1362 Extraction::new("value".to_string(), "500K".to_string()),
1363 Extraction::new("debt".to_string(), "$1.2 billion".to_string()),
1364 ];
1365 let expected_fields = vec!["funding".to_string(), "budget".to_string(), "salary".to_string(), "value".to_string(), "debt".to_string()];
1366
1367 let result = resolver.validate_extractions(&extractions, &expected_fields);
1368 assert!(result.is_valid);
1369
1370 let coercion_summary = result.coercion_summary.unwrap();
1371 assert_eq!(coercion_summary.successful_coercions, 5);
1372
1373 let funding_coercion = coercion_summary.coercion_details.iter()
1374 .find(|d| d.field_name == "funding").unwrap();
1375 assert!(funding_coercion.success);
1376 assert_eq!(funding_coercion.target_type, CoercionTargetType::Currency);
1377 assert!((funding_coercion.coerced_value.as_ref().unwrap().as_f64().unwrap() - 1_500_000.0).abs() < 1.0);
1378 }
1379
1380 #[test]
1381 fn test_percentage_coercion() {
1382 let resolver = create_coercion_resolver();
1383 let extractions = vec![
1384 Extraction::new("accuracy".to_string(), "94.7%".to_string()),
1385 Extraction::new("completion".to_string(), "100%".to_string()),
1386 Extraction::new("error_rate".to_string(), "0.5%".to_string()),
1387 ];
1388 let expected_fields = vec!["accuracy".to_string(), "completion".to_string(), "error_rate".to_string()];
1389
1390 let result = resolver.validate_extractions(&extractions, &expected_fields);
1391 assert!(result.is_valid);
1392
1393 let coercion_summary = result.coercion_summary.unwrap();
1394 assert_eq!(coercion_summary.successful_coercions, 3);
1395
1396 let accuracy_coercion = coercion_summary.coercion_details.iter()
1397 .find(|d| d.field_name == "accuracy").unwrap();
1398 assert!(accuracy_coercion.success);
1399 assert_eq!(accuracy_coercion.target_type, CoercionTargetType::Percentage);
1400 assert!((accuracy_coercion.coerced_value.as_ref().unwrap().as_f64().unwrap() - 0.947).abs() < 0.001);
1401 }
1402
1403 #[test]
1404 fn test_email_coercion() {
1405 let resolver = create_coercion_resolver();
1406 let extractions = vec![
1407 Extraction::new("contact".to_string(), "john.doe@example.com".to_string()),
1408 Extraction::new("support".to_string(), "support@company.org".to_string()),
1409 Extraction::new("invalid".to_string(), "not-an-email".to_string()),
1410 ];
1411 let expected_fields = vec!["contact".to_string(), "support".to_string(), "invalid".to_string()];
1412
1413 let result = resolver.validate_extractions(&extractions, &expected_fields);
1414 assert!(result.is_valid);
1415
1416 let coercion_summary = result.coercion_summary.unwrap();
1417 assert_eq!(coercion_summary.successful_coercions, 2); assert_eq!(coercion_summary.failed_coercions, 1);
1419
1420 let contact_coercion = coercion_summary.coercion_details.iter()
1421 .find(|d| d.field_name == "contact").unwrap();
1422 assert!(contact_coercion.success);
1423 assert_eq!(contact_coercion.target_type, CoercionTargetType::Email);
1424 let coerced_obj = contact_coercion.coerced_value.as_ref().unwrap().as_object().unwrap();
1425 assert_eq!(coerced_obj.get("email").unwrap().as_str().unwrap(), "john.doe@example.com");
1426 }
1427
1428 #[test]
1429 fn test_phone_coercion() {
1430 let resolver = create_coercion_resolver();
1431 let extractions = vec![
1432 Extraction::new("phone1".to_string(), "(617) 555-1234".to_string()),
1433 Extraction::new("phone2".to_string(), "617-555-1234".to_string()),
1434 Extraction::new("phone3".to_string(), "617.555.1234".to_string()),
1435 Extraction::new("phone4".to_string(), "6175551234".to_string()),
1436 Extraction::new("invalid".to_string(), "123-45".to_string()),
1437 ];
1438 let expected_fields = vec!["phone1".to_string(), "phone2".to_string(), "phone3".to_string(), "phone4".to_string(), "invalid".to_string()];
1439
1440 let result = resolver.validate_extractions(&extractions, &expected_fields);
1441 assert!(result.is_valid);
1442
1443 let coercion_summary = result.coercion_summary.unwrap();
1444 assert_eq!(coercion_summary.successful_coercions, 4); assert_eq!(coercion_summary.failed_coercions, 1);
1446
1447 let phone1_coercion = coercion_summary.coercion_details.iter()
1448 .find(|d| d.field_name == "phone1").unwrap();
1449 assert!(phone1_coercion.success);
1450 assert_eq!(phone1_coercion.target_type, CoercionTargetType::PhoneNumber);
1451 let coerced_obj = phone1_coercion.coerced_value.as_ref().unwrap().as_object().unwrap();
1452 assert_eq!(coerced_obj.get("phone").unwrap().as_str().unwrap(), "(617) 555-1234");
1453 }
1454
1455 #[test]
1456 fn test_no_coercion_when_disabled() {
1457 let config = create_test_config();
1458 let validation_config = ValidationConfig {
1459 enable_type_coercion: false, save_raw_outputs: false,
1461 ..Default::default()
1462 };
1463 let resolver = Resolver::with_validation_config(&config, true, validation_config).unwrap();
1464
1465 let extractions = vec![
1466 Extraction::new("age".to_string(), "25".to_string()),
1467 ];
1468 let expected_fields = vec!["age".to_string()];
1469
1470 let result = resolver.validate_extractions(&extractions, &expected_fields);
1471 assert!(result.is_valid);
1472 assert!(result.coercion_summary.is_none()); }
1474
1475 #[test]
1476 fn test_mixed_coercion_results() {
1477 let resolver = create_coercion_resolver();
1478 let extractions = vec![
1479 Extraction::new("age".to_string(), "25".to_string()), Extraction::new("name".to_string(), "John Doe".to_string()), Extraction::new("email".to_string(), "john@example.com".to_string()), Extraction::new("invalid_number".to_string(), "abc123".to_string()), Extraction::new("percentage".to_string(), "95%".to_string()), ];
1485 let expected_fields = vec!["age".to_string(), "name".to_string(), "email".to_string(), "invalid_number".to_string(), "percentage".to_string()];
1486
1487 let result = resolver.validate_extractions(&extractions, &expected_fields);
1488 assert!(result.is_valid);
1489
1490 let coercion_summary = result.coercion_summary.unwrap();
1491 assert_eq!(coercion_summary.successful_coercions, 3); assert_eq!(coercion_summary.failed_coercions, 2); let successful_types: Vec<_> = coercion_summary.coercion_details.iter()
1496 .filter(|d| d.success)
1497 .map(|d| &d.target_type)
1498 .collect();
1499 assert!(successful_types.contains(&&CoercionTargetType::Integer));
1500 assert!(successful_types.contains(&&CoercionTargetType::Email));
1501 assert!(successful_types.contains(&&CoercionTargetType::Percentage));
1502 }
1503
1504 #[test]
1505 fn test_corrected_data_generation() {
1506 let resolver = create_coercion_resolver();
1507 let extractions = vec![
1508 Extraction::new("age".to_string(), "25".to_string()),
1509 Extraction::new("price".to_string(), "$19.99".to_string()),
1510 Extraction::new("active".to_string(), "true".to_string()),
1511 Extraction::new("invalid".to_string(), "not_a_number".to_string()),
1512 ];
1513 let expected_fields = vec!["age".to_string(), "price".to_string(), "active".to_string(), "invalid".to_string()];
1514
1515 let result = resolver.validate_extractions(&extractions, &expected_fields);
1516 assert!(result.is_valid);
1517
1518 let corrected_data = result.corrected_data.unwrap();
1520 let corrected_obj = corrected_data.as_object().unwrap();
1521
1522 assert_eq!(corrected_obj.get("age").unwrap().as_i64().unwrap(), 25);
1524 assert_eq!(corrected_obj.get("price").unwrap().as_f64().unwrap(), 19.99);
1525 assert_eq!(corrected_obj.get("active").unwrap().as_bool().unwrap(), true);
1526
1527 assert_eq!(corrected_obj.get("invalid").unwrap().as_str().unwrap(), "not_a_number");
1529 }
1530 }
1531}