1use converge_core::llm::{LlmProvider, LlmRequest};
42use regex::Regex;
43use std::path::Path;
44use std::sync::Arc;
45
46pub fn preprocess_truths(content: &str) -> String {
62 let re = Regex::new(r"(?m)^(\s*)Truth:").unwrap();
64 re.replace_all(content, "${1}Feature:").to_string()
65}
66
67#[derive(Debug, Clone)]
69pub struct ValidationConfig {
70 pub check_business_sense: bool,
72 pub check_compilability: bool,
74 pub check_conventions: bool,
76 pub min_confidence: f64,
78}
79
80impl Default for ValidationConfig {
81 fn default() -> Self {
82 Self {
83 check_business_sense: true,
84 check_compilability: true,
85 check_conventions: true,
86 min_confidence: 0.7,
87 }
88 }
89}
90
91#[derive(Debug, Clone)]
93pub struct ValidationIssue {
94 pub location: String,
96 pub category: IssueCategory,
98 pub severity: Severity,
100 pub message: String,
102 pub suggestion: Option<String>,
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub enum IssueCategory {
109 BusinessSense,
111 Compilability,
113 Convention,
115 Syntax,
117 NotRelatedError,
119}
120
121#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
123pub enum Severity {
124 Info,
126 Warning,
128 Error,
130}
131
132#[derive(Debug, Clone)]
134pub struct SpecValidation {
135 pub is_valid: bool,
137 pub file_path: String,
139 pub scenario_count: usize,
141 pub issues: Vec<ValidationIssue>,
143 pub confidence: f64,
145}
146
147impl SpecValidation {
148 #[must_use]
150 pub fn has_errors(&self) -> bool {
151 self.issues.iter().any(|i| i.severity == Severity::Error)
152 }
153
154 #[must_use]
156 pub fn has_warnings(&self) -> bool {
157 self.issues.iter().any(|i| i.severity == Severity::Warning)
158 }
159
160 #[must_use]
162 pub fn summary(&self) -> String {
163 let errors = self
164 .issues
165 .iter()
166 .filter(|i| i.severity == Severity::Error)
167 .count();
168 let warnings = self
169 .issues
170 .iter()
171 .filter(|i| i.severity == Severity::Warning)
172 .count();
173
174 if self.is_valid {
175 format!(
176 "✓ {} validated ({} scenarios, {} warnings)",
177 self.file_path, self.scenario_count, warnings
178 )
179 } else {
180 format!(
181 "✗ {} invalid ({} errors, {} warnings)",
182 self.file_path, errors, warnings
183 )
184 }
185 }
186}
187
188pub struct GherkinValidator {
190 provider: Arc<dyn LlmProvider>,
191 config: ValidationConfig,
192}
193
194impl GherkinValidator {
195 #[must_use]
197 pub fn new(provider: Arc<dyn LlmProvider>, config: ValidationConfig) -> Self {
198 Self { provider, config }
199 }
200
201 pub fn validate(
211 &self,
212 content: &str,
213 file_name: &str,
214 ) -> Result<SpecValidation, ValidationError> {
215 let processed = preprocess_truths(content);
217
218 let feature = gherkin::Feature::parse(&processed, gherkin::GherkinEnv::default())
221 .map_err(|e| ValidationError::ParseError(format!("{e}")))?;
222
223 let mut issues = Vec::new();
224 let scenario_count = feature.scenarios.len();
225
226 for scenario in &feature.scenarios {
228 let scenario_issues = self.validate_scenario(&feature, scenario)?;
229 issues.extend(scenario_issues);
230 }
231
232 let feature_issues = self.validate_feature(&feature)?;
234 issues.extend(feature_issues);
235
236 let has_errors = issues.iter().any(|i| i.severity == Severity::Error);
237 let confidence = if issues.is_empty() { 1.0 } else { 0.7 };
238
239 Ok(SpecValidation {
240 is_valid: !has_errors,
241 file_path: file_name.to_string(),
242 scenario_count,
243 issues,
244 confidence,
245 })
246 }
247
248 pub fn validate_file(&self, path: impl AsRef<Path>) -> Result<SpecValidation, ValidationError> {
254 let path = path.as_ref();
255 let content =
256 std::fs::read_to_string(path).map_err(|e| ValidationError::IoError(format!("{e}")))?;
257
258 let file_name = path
259 .file_name()
260 .and_then(|n| n.to_str())
261 .unwrap_or("unknown");
262
263 self.validate(&content, file_name)
264 }
265
266 fn validate_scenario(
273 &self,
274 feature: &gherkin::Feature,
275 scenario: &gherkin::Scenario,
276 ) -> Result<Vec<ValidationIssue>, ValidationError> {
277 let mut issues = Vec::new();
278
279 if self.config.check_business_sense {
281 match self.check_business_sense(feature, scenario) {
282 Ok(Some(issue)) => issues.push(issue),
283 Ok(None) => {} Err(e) => {
285 return Err(e);
287 }
288 }
289 }
290
291 if self.config.check_compilability {
293 match self.check_compilability(feature, scenario) {
294 Ok(Some(issue)) => issues.push(issue),
295 Ok(None) => {} Err(e) => {
297 return Err(e);
299 }
300 }
301 }
302
303 if self.config.check_conventions {
305 issues.extend(self.check_conventions(scenario));
306 }
307
308 Ok(issues)
309 }
310
311 fn validate_feature(
313 &self,
314 feature: &gherkin::Feature,
315 ) -> Result<Vec<ValidationIssue>, ValidationError> {
316 let mut issues = Vec::new();
317
318 if feature.description.is_none() {
320 issues.push(ValidationIssue {
321 location: "Feature".to_string(),
322 category: IssueCategory::Convention,
323 severity: Severity::Warning,
324 message: "Feature lacks a description".to_string(),
325 suggestion: Some("Add a description explaining the business purpose".to_string()),
326 });
327 }
328
329 if feature.scenarios.is_empty() {
331 issues.push(ValidationIssue {
332 location: "Feature".to_string(),
333 category: IssueCategory::Convention,
334 severity: Severity::Error,
335 message: "Feature has no scenarios".to_string(),
336 suggestion: Some("Add at least one scenario".to_string()),
337 });
338 }
339
340 Ok(issues)
341 }
342
343 fn check_business_sense(
345 &self,
346 feature: &gherkin::Feature,
347 scenario: &gherkin::Scenario,
348 ) -> Result<Option<ValidationIssue>, ValidationError> {
349 let prompt = format!(
350 r"You are a business analyst validating Gherkin specifications for a multi-agent AI system called Converge.
351
352Feature: {}
353Scenario: {}
354
355Steps:
356{}
357
358Evaluate if this scenario makes business sense:
3591. Is the precondition (Given) realistic and well-defined?
3602. Is the action (When) meaningful and testable?
3613. Is the expected outcome (Then) measurable and valuable?
362
363Respond with ONLY one of:
364- VALID: if the scenario makes business sense
365- INVALID: <reason> if it doesn't make sense
366- UNCLEAR: <question> if more context is needed",
367 feature.name,
368 scenario.name,
369 format_steps(&scenario.steps)
370 );
371
372 let system_prompt = "You are a strict business requirements validator. Be concise.";
373 let request = LlmRequest::new(prompt.clone())
374 .with_system(system_prompt)
375 .with_max_tokens(200)
376 .with_temperature(0.3);
377
378 eprintln!("\n📤 Business Sense Check - Sending to LLM:");
379 eprintln!(" Scenario: {}", scenario.name);
380 eprintln!(" System Prompt: {system_prompt}");
381 eprintln!(
382 " User Prompt (first 200 chars): {}...",
383 prompt.chars().take(200).collect::<String>()
384 );
385 eprintln!(" Request params: max_tokens=200, temperature=0.3");
386
387 let response = self.provider.complete(&request).map_err(|e| {
388 ValidationError::LlmError(format!("NOT_RELATED_ERROR: LLM API call failed: {e}"))
390 })?;
391
392 eprintln!("\n📥 Business Sense Check - Response from LLM:");
393 eprintln!(" Raw response: {}", response.content);
394 eprintln!(" Model: {}", response.model);
395 eprintln!(
396 " Token usage: prompt={}, completion={}, total={}",
397 response.usage.prompt_tokens,
398 response.usage.completion_tokens,
399 response.usage.total_tokens
400 );
401 eprintln!(" Finish reason: {:?}", response.finish_reason);
402
403 let content = response.content.trim();
404 eprintln!("\n🔍 Business Sense Check - Reasoning:");
405
406 if content.starts_with("INVALID:") {
407 let reason = content.strip_prefix("INVALID:").unwrap_or("").trim();
408 eprintln!(" → Detected: INVALID");
409 eprintln!(" → Reason: {reason}");
410 eprintln!(" → Action: Creating Error-level ValidationIssue");
411 Ok(Some(ValidationIssue {
412 location: format!("Scenario: {}", scenario.name),
413 category: IssueCategory::BusinessSense,
414 severity: Severity::Error,
415 message: reason.to_string(),
416 suggestion: None,
417 }))
418 } else if content.starts_with("UNCLEAR:") {
419 let question = content.strip_prefix("UNCLEAR:").unwrap_or("").trim();
420 eprintln!(" → Detected: UNCLEAR");
421 eprintln!(" → Question: {question}");
422 eprintln!(" → Action: Creating Warning-level ValidationIssue with suggestion");
423 Ok(Some(ValidationIssue {
424 location: format!("Scenario: {}", scenario.name),
425 category: IssueCategory::BusinessSense,
426 severity: Severity::Warning,
427 message: format!("Ambiguous: {question}"),
428 suggestion: Some("Clarify the scenario requirements".to_string()),
429 }))
430 } else {
431 eprintln!(" → Detected: VALID (or response doesn't match expected format)");
432 eprintln!(" → Action: No issue created (scenario passes business sense check)");
433 Ok(None) }
435 }
436
437 fn check_compilability(
439 &self,
440 feature: &gherkin::Feature,
441 scenario: &gherkin::Scenario,
442 ) -> Result<Option<ValidationIssue>, ValidationError> {
443 let prompt = format!(
444 r"You are a Rust developer checking if a Gherkin scenario can be compiled to a runtime invariant.
445
446In Converge, invariants are Rust structs implementing:
447```rust
448trait Invariant {{
449 fn name(&self) -> &str;
450 fn class(&self) -> InvariantClass; // Structural, Semantic, or Acceptance
451 fn check(&self, ctx: &Context) -> InvariantResult;
452}}
453```
454
455The Context has typed facts in categories: Seeds, Hypotheses, Strategies, Constraints, Signals, Competitors, Evaluations.
456
457Feature: {}
458Scenario: {}
459Steps:
460{}
461
462Can this scenario be implemented as a Converge Invariant?
463
464Respond with ONLY one of:
465- COMPILABLE: <invariant_class> - brief description of implementation
466- NOT_COMPILABLE: <reason why it cannot be a runtime check>
467- NEEDS_REFACTOR: <suggestion to make it compilable>",
468 feature.name,
469 scenario.name,
470 format_steps(&scenario.steps)
471 );
472
473 let system_prompt =
474 "You are a Rust expert. Be precise about what can be checked at runtime.";
475 let request = LlmRequest::new(prompt.clone())
476 .with_system(system_prompt)
477 .with_max_tokens(200)
478 .with_temperature(0.3);
479
480 eprintln!("\n📤 Compilability Check - Sending to LLM:");
481 eprintln!(" Scenario: {}", scenario.name);
482 eprintln!(" System Prompt: {system_prompt}");
483 eprintln!(
484 " User Prompt (first 200 chars): {}...",
485 prompt.chars().take(200).collect::<String>()
486 );
487 eprintln!(" Request params: max_tokens=200, temperature=0.3");
488
489 let response = self.provider.complete(&request).map_err(|e| {
490 ValidationError::LlmError(format!("NOT_RELATED_ERROR: LLM API call failed: {e}"))
492 })?;
493
494 eprintln!("\n📥 Compilability Check - Response from LLM:");
495 eprintln!(" Raw response: {}", response.content);
496 eprintln!(" Model: {}", response.model);
497 eprintln!(
498 " Token usage: prompt={}, completion={}, total={}",
499 response.usage.prompt_tokens,
500 response.usage.completion_tokens,
501 response.usage.total_tokens
502 );
503 eprintln!(" Finish reason: {:?}", response.finish_reason);
504
505 let content = response.content.trim();
506 eprintln!("\n🔍 Compilability Check - Reasoning:");
507
508 if content.starts_with("NOT_COMPILABLE:") {
509 let reason = content.strip_prefix("NOT_COMPILABLE:").unwrap_or("").trim();
510 eprintln!(" → Detected: NOT_COMPILABLE");
511 eprintln!(" → Reason: {reason}");
512 eprintln!(" → Action: Creating Error-level ValidationIssue");
513 Ok(Some(ValidationIssue {
514 location: format!("Scenario: {}", scenario.name),
515 category: IssueCategory::Compilability,
516 severity: Severity::Error,
517 message: format!("Cannot compile to invariant: {reason}"),
518 suggestion: None,
519 }))
520 } else if content.starts_with("NEEDS_REFACTOR:") {
521 let suggestion = content.strip_prefix("NEEDS_REFACTOR:").unwrap_or("").trim();
522 eprintln!(" → Detected: NEEDS_REFACTOR");
523 eprintln!(" → Suggestion: {suggestion}");
524 eprintln!(
525 " → Action: Creating Warning-level ValidationIssue with refactoring suggestion"
526 );
527 Ok(Some(ValidationIssue {
528 location: format!("Scenario: {}", scenario.name),
529 category: IssueCategory::Compilability,
530 severity: Severity::Warning,
531 message: "Scenario needs refactoring to be compilable".to_string(),
532 suggestion: Some(suggestion.to_string()),
533 }))
534 } else if content.starts_with("COMPILABLE:") {
535 let details = content.strip_prefix("COMPILABLE:").unwrap_or("").trim();
536 eprintln!(" → Detected: COMPILABLE");
537 eprintln!(" → Details: {details}");
538 eprintln!(" → Action: No issue created (scenario is compilable)");
539 Ok(None) } else {
541 eprintln!(" → Warning: Response doesn't match expected format");
542 eprintln!(" → Raw response: {content}");
543 eprintln!(" → Action: Treating as COMPILABLE (no issue created)");
544 Ok(None) }
546 }
547
548 fn check_conventions(&self, scenario: &gherkin::Scenario) -> Vec<ValidationIssue> {
550 let mut issues = Vec::new();
551
552 if scenario.name.is_empty() {
554 issues.push(ValidationIssue {
555 location: "Scenario".to_string(),
556 category: IssueCategory::Convention,
557 severity: Severity::Error,
558 message: "Scenario has no name".to_string(),
559 suggestion: Some("Add a descriptive name".to_string()),
560 });
561 }
562
563 let has_given = scenario
565 .steps
566 .iter()
567 .any(|s| matches!(s.ty, gherkin::StepType::Given));
568 let has_when = scenario
569 .steps
570 .iter()
571 .any(|s| matches!(s.ty, gherkin::StepType::When));
572 let has_then = scenario
573 .steps
574 .iter()
575 .any(|s| matches!(s.ty, gherkin::StepType::Then));
576
577 if !has_given && !has_when {
578 issues.push(ValidationIssue {
579 location: format!("Scenario: {}", scenario.name),
580 category: IssueCategory::Convention,
581 severity: Severity::Warning,
582 message: "Scenario lacks Given or When steps".to_string(),
583 suggestion: Some("Add preconditions (Given) or actions (When)".to_string()),
584 });
585 }
586
587 if !has_then {
588 issues.push(ValidationIssue {
589 location: format!("Scenario: {}", scenario.name),
590 category: IssueCategory::Convention,
591 severity: Severity::Error,
592 message: "Scenario lacks Then steps (expected outcomes)".to_string(),
593 suggestion: Some(
594 "Add at least one Then step defining the expected outcome".to_string(),
595 ),
596 });
597 }
598
599 for step in &scenario.steps {
601 if step.value.contains("should") && matches!(step.ty, gherkin::StepType::Then) {
602 } else if step.value.contains("must") || step.value.contains("always") {
604 } else if step.value.contains("might") || step.value.contains("maybe") {
606 issues.push(ValidationIssue {
607 location: format!("Step: {}", step.value),
608 category: IssueCategory::Convention,
609 severity: Severity::Warning,
610 message: "Uncertain language in step ('might', 'maybe')".to_string(),
611 suggestion: Some("Use definite language for testable assertions".to_string()),
612 });
613 }
614 }
615
616 issues
617 }
618}
619
620pub struct SpecGenerator {
622 provider: Arc<dyn LlmProvider>,
623}
624
625impl SpecGenerator {
626 #[must_use]
628 pub fn new(provider: Arc<dyn LlmProvider>) -> Self {
629 Self { provider }
630 }
631
632 pub fn generate_from_text(&self, text: &str) -> Result<String, ValidationError> {
638 let prompt = format!(
639 r"You are a requirements engineer for a multi-agent AI system called Converge.
640Convert the following free text into a valid Gherkin/Truth specification.
641
642Free Text:
643{}
644
645Rules for generation:
6461. Use Converge Truth syntax (`Truth:` instead of `Feature:`).
6472. Include a concise business description immediately after the Truth header.
6483. Ensure at least one scenario is generated.
6494. Each scenario must have Given/When/Then steps.
6505. Use definite language (avoid 'might', 'maybe').
6516. Focus on testable business outcomes.
652
653Return ONLY the Gherkin content, no explanation or preamble.
654
655Example Format:
656Truth: <name>
657 <description line 1>
658 <description line 2>
659
660 Scenario: <name>
661 Given <state>
662 When <action>
663 Then <outcome>",
664 text
665 );
666
667 let system_prompt =
668 "You are an expert Gherkin spec writer. Respond with ONLY the specification.";
669 let request = LlmRequest::new(prompt)
670 .with_system(system_prompt)
671 .with_max_tokens(1000)
672 .with_temperature(0.3);
673
674 let response = self
675 .provider
676 .complete(&request)
677 .map_err(|e| ValidationError::LlmError(format!("LLM API call failed: {e}")))?;
678
679 Ok(response.content.trim().to_string())
680 }
681}
682
683fn format_steps(steps: &[gherkin::Step]) -> String {
685 steps
686 .iter()
687 .map(|s| format!("{:?} {}", s.keyword, s.value))
688 .collect::<Vec<_>>()
689 .join("\n")
690}
691
692#[derive(Debug, Clone)]
694pub enum ValidationError {
695 ParseError(String),
697 IoError(String),
699 LlmError(String),
701}
702
703impl std::fmt::Display for ValidationError {
704 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
705 match self {
706 Self::ParseError(msg) => write!(f, "Parse error: {msg}"),
707 Self::IoError(msg) => write!(f, "IO error: {msg}"),
708 Self::LlmError(msg) => write!(f, "LLM error: {msg}"),
709 }
710 }
711}
712
713impl std::error::Error for ValidationError {}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718 use converge_core::llm::{MockProvider, MockResponse};
719
720 fn mock_valid_provider() -> Arc<dyn LlmProvider> {
721 Arc::new(MockProvider::new(vec![
722 MockResponse::success("VALID", 0.9),
723 MockResponse::success("COMPILABLE: Acceptance - check strategy count", 0.9),
724 ]))
725 }
726
727 #[test]
728 fn preprocess_converts_truth_to_feature() {
729 let input = "Truth: Get paid for delivered work\n Scenario: Invoice";
730 let output = preprocess_truths(input);
731 assert!(output.starts_with("Feature:"));
732 assert!(output.contains("Scenario: Invoice"));
733 }
734
735 #[test]
736 fn preprocess_preserves_feature_keyword() {
737 let input = "Feature: Standard Gherkin\n Scenario: Test";
738 let output = preprocess_truths(input);
739 assert_eq!(input, output);
740 }
741
742 #[test]
743 fn validation_config_default() {
744 let config = ValidationConfig::default();
745 assert!(config.check_conventions);
746 assert!(config.check_business_sense);
747 assert!(config.check_compilability);
748 assert_eq!(config.min_confidence, 0.7);
749 }
750
751 #[test]
752 fn validation_config_custom() {
753 let config = ValidationConfig {
754 check_business_sense: false,
755 min_confidence: 0.9,
756 ..ValidationConfig::default()
757 };
758 assert!(!config.check_business_sense);
759 assert_eq!(config.min_confidence, 0.9);
760 assert!(config.check_conventions);
761 }
762
763 #[test]
764 fn validates_truth_syntax() {
765 let content = r"
766Truth: Get paid for delivered work
767 Scenario: Invoice and collect
768 Given work is marked as delivered
769 When the system converges
770 Then invoice is issued
771";
772
773 let validator = GherkinValidator::new(mock_valid_provider(), ValidationConfig::default());
774
775 let result = validator.validate(content, "money.truth").unwrap();
776
777 assert_eq!(result.scenario_count, 1);
778 }
780
781 #[test]
782 fn validates_simple_feature() {
783 let content = r"
784Feature: Growth Strategy Validation
785 Scenario: Multiple strategies required
786 When the system converges
787 Then at least two distinct growth strategies exist
788";
789
790 let validator = GherkinValidator::new(mock_valid_provider(), ValidationConfig::default());
791
792 let result = validator.validate(content, "test.feature").unwrap();
793
794 assert_eq!(result.scenario_count, 1);
795 }
797
798 #[test]
799 fn detects_missing_then() {
800 let content = r"
801Feature: Bad Spec
802 Scenario: No assertions
803 Given some precondition
804 When something happens
805";
806
807 let validator = GherkinValidator::new(
808 mock_valid_provider(),
809 ValidationConfig {
810 check_business_sense: false,
811 check_compilability: false,
812 check_conventions: true,
813 min_confidence: 0.7,
814 },
815 );
816
817 let result = validator.validate(content, "bad.feature").unwrap();
818
819 assert!(result.has_errors());
820 assert!(result
821 .issues
822 .iter()
823 .any(|i| i.category == IssueCategory::Convention && i.message.contains("Then")));
824 }
825
826 #[test]
827 fn detects_uncertain_language() {
828 let content = r"
829Feature: Uncertain Spec
830 Scenario: Maybe works
831 When something happens
832 Then it might succeed
833";
834
835 let validator = GherkinValidator::new(
836 mock_valid_provider(),
837 ValidationConfig {
838 check_business_sense: false,
839 check_compilability: false,
840 check_conventions: true,
841 min_confidence: 0.7,
842 },
843 );
844
845 let result = validator.validate(content, "uncertain.feature").unwrap();
846
847 assert!(result.has_warnings());
848 assert!(result.issues.iter().any(|i| i.message.contains("might")));
849 }
850
851 #[test]
852 fn handles_llm_invalid_response() {
853 let provider = Arc::new(MockProvider::new(vec![
854 MockResponse::success("INVALID: The scenario describes an untestable state", 0.8),
855 MockResponse::success("COMPILABLE: Acceptance", 0.9),
856 ]));
857
858 let content = r"
859Feature: Test
860 Scenario: Bad business logic
861 When magic happens
862 Then everything is perfect forever
863";
864
865 let validator = GherkinValidator::new(provider, ValidationConfig::default());
866
867 let result = validator.validate(content, "test.feature").unwrap();
868
869 assert!(result
870 .issues
871 .iter()
872 .any(|i| i.category == IssueCategory::BusinessSense && i.severity == Severity::Error));
873 }
874
875 #[test]
876 fn generates_spec_from_text() {
877 let mock_spec = "Truth: Test\n Scenario: Test\n Given X\n Then Y";
878 let provider = Arc::new(MockProvider::new(vec![MockResponse::success(
879 mock_spec, 0.9,
880 )]));
881
882 let generator = SpecGenerator::new(provider);
883 let result = generator.generate_from_text("Make a test spec").unwrap();
884
885 assert_eq!(result, mock_spec);
886 }
887
888 mod property_tests {
889 use super::*;
890 use proptest::prelude::*;
891
892 proptest! {
893 #[test]
894 fn preprocess_never_crashes(s in "\\PC*") {
895 let _ = preprocess_truths(&s);
896 }
897
898 #[test]
899 fn truth_to_feature_conversion(s in ".*Truth:.*") {
900 let _output = preprocess_truths(&s);
901 }
905
906 #[test]
907 fn idempotency_of_feature(s in ".*Feature:.*") {
908 if !s.contains("Truth:") {
911 let output = preprocess_truths(&s);
912 assert_eq!(s, output);
913 }
914 }
915 }
916 }
917}