converge_tool/
gherkin.rs

1// Copyright 2024-2025 Aprio One AB, Sweden
2// Author: Kenneth Pernyer, kenneth@aprio.one
3// SPDX-License-Identifier: MIT
4// See LICENSE file in the project root for full license information.
5
6//! Gherkin spec validation for Converge.
7//!
8//! This module provides LLM-powered validation of Gherkin specifications
9//! to ensure they:
10//!
11//! 1. Make business sense (semantic validity)
12//! 2. Can be compiled to Rust invariants (technical feasibility)
13//! 3. Follow Converge conventions (style compliance)
14//!
15//! # Converge Truths
16//!
17//! Converge uses "Truth" as a branded alias for "Feature" in Gherkin specs.
18//! Both keywords are valid:
19//!
20//! ```gherkin
21//! Truth: Get paid for delivered work    # Converge branded syntax
22//! Feature: Get paid for delivered work  # Standard Gherkin syntax
23//! ```
24//!
25//! The preprocessor automatically converts `Truth:` to `Feature:` before parsing.
26//!
27//! # File Extensions
28//!
29//! Converge supports both `.truth` (preferred) and `.feature` file extensions.
30//!
31//! # Architecture
32//!
33//! ```text
34//! .truth file → Preprocessor → Parser → Scenarios → LLM Validator → Report
35//!               (Truth→Feature)              │
36//!                                            ├── Business sense check
37//!                                            ├── Compilability check
38//!                                            └── Convention check
39//! ```
40
41use converge_core::llm::{LlmProvider, LlmRequest};
42use regex::Regex;
43use std::path::Path;
44use std::sync::Arc;
45
46/// Preprocesses Converge Truth syntax to standard Gherkin.
47///
48/// Converts `Truth:` keyword to `Feature:` for parser compatibility.
49/// This allows Converge specs to use the branded "Truth" terminology
50/// while maintaining compatibility with standard Gherkin parsers.
51///
52/// # Examples
53///
54/// ```
55/// use converge_tool::gherkin::preprocess_truths;
56///
57/// let input = "Truth: Get paid for delivered work\n  Scenario: Invoice";
58/// let output = preprocess_truths(input);
59/// assert!(output.starts_with("Feature:"));
60/// ```
61pub fn preprocess_truths(content: &str) -> String {
62    // Match "Truth:" at the start of a line (with optional leading whitespace)
63    let re = Regex::new(r"(?m)^(\s*)Truth:").unwrap();
64    re.replace_all(content, "${1}Feature:").to_string()
65}
66
67/// Configuration for Gherkin validation.
68#[derive(Debug, Clone)]
69pub struct ValidationConfig {
70    /// Whether to check business sense.
71    pub check_business_sense: bool,
72    /// Whether to check compilability to Rust.
73    pub check_compilability: bool,
74    /// Whether to check convention compliance.
75    pub check_conventions: bool,
76    /// Minimum confidence threshold for LLM assessments.
77    pub min_confidence: f64,
78}
79
80impl Default for ValidationConfig {
81    fn default() -> Self {
82        Self {
83            check_business_sense: true,
84            check_compilability: true,
85            check_conventions: true,
86            min_confidence: 0.7,
87        }
88    }
89}
90
91/// Issue found during validation.
92#[derive(Debug, Clone)]
93pub struct ValidationIssue {
94    /// The scenario or step that has the issue.
95    pub location: String,
96    /// Category of the issue.
97    pub category: IssueCategory,
98    /// Severity level.
99    pub severity: Severity,
100    /// Human-readable description.
101    pub message: String,
102    /// Suggested fix (if available).
103    pub suggestion: Option<String>,
104}
105
106/// Category of validation issue.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub enum IssueCategory {
109    /// The spec doesn't make business sense.
110    BusinessSense,
111    /// The spec cannot be compiled to a Rust invariant.
112    Compilability,
113    /// The spec doesn't follow conventions.
114    Convention,
115    /// Syntax error in Gherkin.
116    Syntax,
117    /// Error not related to Gherkin validation (e.g., LLM API errors, network issues).
118    NotRelatedError,
119}
120
121/// Severity of a validation issue.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
123pub enum Severity {
124    /// Informational suggestion.
125    Info,
126    /// Warning - might cause problems.
127    Warning,
128    /// Error - must be fixed.
129    Error,
130}
131
132// ============================================================================
133// Scenario Tag Extraction
134// ============================================================================
135
136/// Metadata extracted from Gherkin scenario tags.
137///
138/// Converge uses structured tags on scenarios to declare intent:
139///
140/// ```gherkin
141/// @invariant @structural @id:brand_safety
142/// Scenario: Strategies must not contain brand-unsafe terms
143/// ```
144///
145/// This struct captures the parsed tag structure for downstream
146/// compilation (codegen, WASM manifest generation, etc.).
147#[derive(Debug, Clone, PartialEq, Eq)]
148pub struct ScenarioMeta {
149    /// The scenario name from Gherkin.
150    pub name: String,
151    /// Parsed scenario kind from tags.
152    pub kind: Option<ScenarioKind>,
153    /// For invariant scenarios: the invariant class.
154    pub invariant_class: Option<InvariantClassTag>,
155    /// Unique identifier from `@id:<value>` tag.
156    pub id: Option<String>,
157    /// Provider type (e.g., "llm") from `@llm` tag.
158    pub provider: Option<String>,
159    /// Whether this is a test-only scenario (`@test` tag).
160    pub is_test: bool,
161    /// Raw tags as parsed (for extensibility).
162    pub raw_tags: Vec<String>,
163}
164
165/// What kind of scenario this is.
166#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
167pub enum ScenarioKind {
168    /// A runtime invariant (compiles to `Invariant` impl).
169    Invariant,
170    /// A proposal validation rule.
171    Validation,
172    /// An agent contract (what the agent may propose).
173    Agent,
174    /// An end-to-end integration test.
175    EndToEnd,
176}
177
178/// Invariant class parsed from tags.
179///
180/// Maps to `converge_core::InvariantClass`.
181#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
182pub enum InvariantClassTag {
183    /// Checked after every merge. Violation = immediate failure.
184    Structural,
185    /// Checked per cycle. Violation = blocks convergence.
186    Semantic,
187    /// Checked at convergence. Violation = rejects results.
188    Acceptance,
189}
190
191/// Extract structured metadata from a parsed Gherkin scenario's tags.
192///
193/// Recognizes Converge tag conventions:
194/// - Kind: `@invariant`, `@validation`, `@agent`, `@e2e`
195/// - Class: `@structural`, `@semantic`, `@acceptance`
196/// - Provider: `@llm`
197/// - Test flag: `@test`
198/// - Identity: `@id:<identifier>`
199///
200/// # Examples
201///
202/// ```
203/// # fn main() {
204/// use converge_tool::gherkin::{extract_scenario_meta, ScenarioKind, InvariantClassTag};
205///
206/// // Simulate what gherkin crate produces for:
207/// //   @invariant @structural @id:brand_safety
208/// //   Scenario: Strategies must not contain brand-unsafe terms
209/// let tags = vec!["invariant".to_string(), "structural".to_string(), "id:brand_safety".to_string()];
210/// let meta = extract_scenario_meta("Strategies must not contain brand-unsafe terms", &tags);
211///
212/// assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
213/// assert_eq!(meta.invariant_class, Some(InvariantClassTag::Structural));
214/// assert_eq!(meta.id.as_deref(), Some("brand_safety"));
215/// # }
216/// ```
217pub fn extract_scenario_meta(name: &str, tags: &[String]) -> ScenarioMeta {
218    let mut kind = None;
219    let mut invariant_class = None;
220    let mut id = None;
221    let mut provider = None;
222    let mut is_test = false;
223
224    for raw_tag in tags {
225        // Strip @ prefix if present (gherkin crate may or may not include it)
226        let tag = raw_tag.strip_prefix('@').unwrap_or(raw_tag);
227
228        match tag {
229            "invariant" => kind = Some(ScenarioKind::Invariant),
230            "validation" => kind = Some(ScenarioKind::Validation),
231            "agent" => kind = Some(ScenarioKind::Agent),
232            "e2e" => kind = Some(ScenarioKind::EndToEnd),
233            "structural" => invariant_class = Some(InvariantClassTag::Structural),
234            "semantic" => invariant_class = Some(InvariantClassTag::Semantic),
235            "acceptance" => invariant_class = Some(InvariantClassTag::Acceptance),
236            "llm" => provider = Some("llm".to_string()),
237            "test" => is_test = true,
238            t if t.starts_with("id:") => {
239                id = Some(t.strip_prefix("id:").unwrap_or("").to_string());
240            }
241            _ => {} // Unknown tags are preserved in raw_tags
242        }
243    }
244
245    ScenarioMeta {
246        name: name.to_string(),
247        kind,
248        invariant_class,
249        id,
250        provider,
251        is_test,
252        raw_tags: tags.to_vec(),
253    }
254}
255
256/// Extract metadata from all scenarios in a Gherkin/Truth string.
257///
258/// Parses the content (handling `Truth:` → `Feature:` conversion) and
259/// returns a `ScenarioMeta` for each scenario found.
260///
261/// # Errors
262///
263/// Returns `ValidationError::ParseError` if the Gherkin cannot be parsed.
264///
265/// # Examples
266///
267/// ```
268/// use converge_tool::gherkin::{extract_all_metas, ScenarioKind};
269///
270/// let content = r#"
271/// Truth: Growth Strategy Pack
272///
273///   @invariant @structural @id:brand_safety
274///   Scenario: Strategies must not contain brand-unsafe terms
275///     Given any fact under key "Strategies"
276///     Then it must not contain forbidden terms
277///
278///   @agent @llm @id:market_signal
279///   Scenario: Market Signal agent proposes Signals
280///     Given the Context contains facts under key "Seeds"
281///     When agent "market_signal" executes
282///     Then it proposes facts under key "Signals"
283/// "#;
284///
285/// let metas = extract_all_metas(content).unwrap();
286/// assert_eq!(metas.len(), 2);
287/// assert_eq!(metas[0].kind, Some(ScenarioKind::Invariant));
288/// assert_eq!(metas[1].kind, Some(ScenarioKind::Agent));
289/// ```
290pub fn extract_all_metas(content: &str) -> Result<Vec<ScenarioMeta>, ValidationError> {
291    let processed = preprocess_truths(content);
292    let feature = gherkin::Feature::parse(&processed, gherkin::GherkinEnv::default())
293        .map_err(|e| ValidationError::ParseError(format!("{e}")))?;
294
295    Ok(feature
296        .scenarios
297        .iter()
298        .map(|s| extract_scenario_meta(&s.name, &s.tags))
299        .collect())
300}
301
302/// Result of validating a Gherkin specification.
303#[derive(Debug, Clone)]
304pub struct SpecValidation {
305    /// Whether the spec is valid overall.
306    pub is_valid: bool,
307    /// Path to the validated file.
308    pub file_path: String,
309    /// Number of scenarios validated.
310    pub scenario_count: usize,
311    /// Issues found during validation.
312    pub issues: Vec<ValidationIssue>,
313    /// Overall confidence score (0.0 - 1.0).
314    pub confidence: f64,
315    /// Parsed metadata for each scenario.
316    pub scenario_metas: Vec<ScenarioMeta>,
317}
318
319impl SpecValidation {
320    /// Returns true if there are any errors.
321    #[must_use]
322    pub fn has_errors(&self) -> bool {
323        self.issues.iter().any(|i| i.severity == Severity::Error)
324    }
325
326    /// Returns true if there are any warnings.
327    #[must_use]
328    pub fn has_warnings(&self) -> bool {
329        self.issues.iter().any(|i| i.severity == Severity::Warning)
330    }
331
332    /// Returns a summary string.
333    #[must_use]
334    pub fn summary(&self) -> String {
335        let errors = self
336            .issues
337            .iter()
338            .filter(|i| i.severity == Severity::Error)
339            .count();
340        let warnings = self
341            .issues
342            .iter()
343            .filter(|i| i.severity == Severity::Warning)
344            .count();
345
346        if self.is_valid {
347            format!(
348                "✓ {} validated ({} scenarios, {} warnings)",
349                self.file_path, self.scenario_count, warnings
350            )
351        } else {
352            format!(
353                "✗ {} invalid ({} errors, {} warnings)",
354                self.file_path, errors, warnings
355            )
356        }
357    }
358}
359
360/// LLM-powered Gherkin specification validator.
361pub struct GherkinValidator {
362    provider: Arc<dyn LlmProvider>,
363    config: ValidationConfig,
364}
365
366impl GherkinValidator {
367    /// Creates a new validator with the given LLM provider.
368    #[must_use]
369    pub fn new(provider: Arc<dyn LlmProvider>, config: ValidationConfig) -> Self {
370        Self { provider, config }
371    }
372
373    /// Validates a Gherkin specification from a string.
374    ///
375    /// Supports both standard Gherkin (`Feature:`) and Converge Truth (`Truth:`) syntax.
376    ///
377    /// # Errors
378    ///
379    /// Returns error if the specification cannot be parsed or validated.
380    /// LLM API errors are wrapped as `ValidationError::LlmError` with "`NOT_RELATED_ERROR`:" prefix
381    /// to distinguish them from Gherkin validation issues.
382    pub fn validate(
383        &self,
384        content: &str,
385        file_name: &str,
386    ) -> Result<SpecValidation, ValidationError> {
387        // Preprocess: convert Truth: to Feature: for parser compatibility
388        let processed = preprocess_truths(content);
389
390        // Parse the Gherkin content
391        // Syntax errors are Gherkin validation issues
392        let feature = gherkin::Feature::parse(&processed, gherkin::GherkinEnv::default())
393            .map_err(|e| ValidationError::ParseError(format!("{e}")))?;
394
395        let mut issues = Vec::new();
396        let scenario_count = feature.scenarios.len();
397
398        // Validate each scenario
399        for scenario in &feature.scenarios {
400            let scenario_issues = self.validate_scenario(&feature, scenario)?;
401            issues.extend(scenario_issues);
402        }
403
404        // Check overall feature structure
405        let feature_issues = self.validate_feature(&feature)?;
406        issues.extend(feature_issues);
407
408        // Extract structured metadata from scenario tags
409        let scenario_metas: Vec<ScenarioMeta> = feature
410            .scenarios
411            .iter()
412            .map(|s| extract_scenario_meta(&s.name, &s.tags))
413            .collect();
414
415        let has_errors = issues.iter().any(|i| i.severity == Severity::Error);
416        let confidence = if issues.is_empty() { 1.0 } else { 0.7 };
417
418        Ok(SpecValidation {
419            is_valid: !has_errors,
420            file_path: file_name.to_string(),
421            scenario_count,
422            issues,
423            confidence,
424            scenario_metas,
425        })
426    }
427
428    /// Validates a Gherkin specification from a file.
429    ///
430    /// # Errors
431    ///
432    /// Returns error if the file cannot be read or validated.
433    pub fn validate_file(&self, path: impl AsRef<Path>) -> Result<SpecValidation, ValidationError> {
434        let path = path.as_ref();
435        let content =
436            std::fs::read_to_string(path).map_err(|e| ValidationError::IoError(format!("{e}")))?;
437
438        let file_name = path
439            .file_name()
440            .and_then(|n| n.to_str())
441            .unwrap_or("unknown");
442
443        self.validate(&content, file_name)
444    }
445
446    /// Validates a single scenario.
447    ///
448    /// # Errors
449    ///
450    /// Returns `ValidationError` if LLM API calls fail (wrapped as `NOT_RELATED_ERROR`).
451    /// Gherkin validation issues are returned as `ValidationIssue` items, not errors.
452    fn validate_scenario(
453        &self,
454        feature: &gherkin::Feature,
455        scenario: &gherkin::Scenario,
456    ) -> Result<Vec<ValidationIssue>, ValidationError> {
457        let mut issues = Vec::new();
458
459        // Check business sense if enabled
460        if self.config.check_business_sense {
461            match self.check_business_sense(feature, scenario) {
462                Ok(Some(issue)) => issues.push(issue),
463                Ok(None) => {} // No issue found
464                Err(e) => {
465                    // LLM errors are not Gherkin validation issues - propagate as error
466                    return Err(e);
467                }
468            }
469        }
470
471        // Check compilability if enabled
472        if self.config.check_compilability {
473            match self.check_compilability(feature, scenario) {
474                Ok(Some(issue)) => issues.push(issue),
475                Ok(None) => {} // No issue found
476                Err(e) => {
477                    // LLM errors are not Gherkin validation issues - propagate as error
478                    return Err(e);
479                }
480            }
481        }
482
483        // Check conventions if enabled (no LLM, so no errors possible)
484        if self.config.check_conventions {
485            issues.extend(self.check_conventions(scenario));
486        }
487
488        Ok(issues)
489    }
490
491    /// Validates the overall feature structure.
492    fn validate_feature(
493        &self,
494        feature: &gherkin::Feature,
495    ) -> Result<Vec<ValidationIssue>, ValidationError> {
496        let mut issues = Vec::new();
497
498        // Check that the feature has a description
499        if feature.description.is_none() {
500            issues.push(ValidationIssue {
501                location: "Feature".to_string(),
502                category: IssueCategory::Convention,
503                severity: Severity::Warning,
504                message: "Feature lacks a description".to_string(),
505                suggestion: Some("Add a description explaining the business purpose".to_string()),
506            });
507        }
508
509        // Check for empty feature
510        if feature.scenarios.is_empty() {
511            issues.push(ValidationIssue {
512                location: "Feature".to_string(),
513                category: IssueCategory::Convention,
514                severity: Severity::Error,
515                message: "Feature has no scenarios".to_string(),
516                suggestion: Some("Add at least one scenario".to_string()),
517            });
518        }
519
520        Ok(issues)
521    }
522
523    /// Uses LLM to check if a scenario makes business sense.
524    fn check_business_sense(
525        &self,
526        feature: &gherkin::Feature,
527        scenario: &gherkin::Scenario,
528    ) -> Result<Option<ValidationIssue>, ValidationError> {
529        let prompt = format!(
530            r"You are a business analyst validating Gherkin specifications for a multi-agent AI system called Converge.
531
532Feature: {}
533Scenario: {}
534
535Steps:
536{}
537
538Evaluate if this scenario makes business sense:
5391. Is the precondition (Given) realistic and well-defined?
5402. Is the action (When) meaningful and testable?
5413. Is the expected outcome (Then) measurable and valuable?
542
543Respond with ONLY one of:
544- VALID: if the scenario makes business sense
545- INVALID: <reason> if it doesn't make sense
546- UNCLEAR: <question> if more context is needed",
547            feature.name,
548            scenario.name,
549            format_steps(&scenario.steps)
550        );
551
552        let system_prompt = "You are a strict business requirements validator. Be concise.";
553        let request = LlmRequest::new(prompt.clone())
554            .with_system(system_prompt)
555            .with_max_tokens(200)
556            .with_temperature(0.3);
557
558        eprintln!("\n📤 Business Sense Check - Sending to LLM:");
559        eprintln!("   Scenario: {}", scenario.name);
560        eprintln!("   System Prompt: {system_prompt}");
561        eprintln!(
562            "   User Prompt (first 200 chars): {}...",
563            prompt.chars().take(200).collect::<String>()
564        );
565        eprintln!("   Request params: max_tokens=200, temperature=0.3");
566
567        let response = self.provider.complete(&request).map_err(|e| {
568            // LLM API errors are not Gherkin validation issues
569            ValidationError::LlmError(format!("NOT_RELATED_ERROR: LLM API call failed: {e}"))
570        })?;
571
572        eprintln!("\n📥 Business Sense Check - Response from LLM:");
573        eprintln!("   Raw response: {}", response.content);
574        eprintln!("   Model: {}", response.model);
575        eprintln!(
576            "   Token usage: prompt={}, completion={}, total={}",
577            response.usage.prompt_tokens,
578            response.usage.completion_tokens,
579            response.usage.total_tokens
580        );
581        eprintln!("   Finish reason: {:?}", response.finish_reason);
582
583        let content = response.content.trim();
584        eprintln!("\n🔍 Business Sense Check - Reasoning:");
585
586        if content.starts_with("INVALID:") {
587            let reason = content.strip_prefix("INVALID:").unwrap_or("").trim();
588            eprintln!("   → Detected: INVALID");
589            eprintln!("   → Reason: {reason}");
590            eprintln!("   → Action: Creating Error-level ValidationIssue");
591            Ok(Some(ValidationIssue {
592                location: format!("Scenario: {}", scenario.name),
593                category: IssueCategory::BusinessSense,
594                severity: Severity::Error,
595                message: reason.to_string(),
596                suggestion: None,
597            }))
598        } else if content.starts_with("UNCLEAR:") {
599            let question = content.strip_prefix("UNCLEAR:").unwrap_or("").trim();
600            eprintln!("   → Detected: UNCLEAR");
601            eprintln!("   → Question: {question}");
602            eprintln!("   → Action: Creating Warning-level ValidationIssue with suggestion");
603            Ok(Some(ValidationIssue {
604                location: format!("Scenario: {}", scenario.name),
605                category: IssueCategory::BusinessSense,
606                severity: Severity::Warning,
607                message: format!("Ambiguous: {question}"),
608                suggestion: Some("Clarify the scenario requirements".to_string()),
609            }))
610        } else {
611            eprintln!("   → Detected: VALID (or response doesn't match expected format)");
612            eprintln!("   → Action: No issue created (scenario passes business sense check)");
613            Ok(None) // VALID
614        }
615    }
616
617    /// Uses LLM to check if a scenario can be compiled to a Rust invariant.
618    fn check_compilability(
619        &self,
620        feature: &gherkin::Feature,
621        scenario: &gherkin::Scenario,
622    ) -> Result<Option<ValidationIssue>, ValidationError> {
623        let prompt = format!(
624            r"You are a Rust developer checking if a Gherkin scenario can be compiled to a runtime invariant.
625
626In Converge, invariants are Rust structs implementing:
627```rust
628trait Invariant {{
629    fn name(&self) -> &str;
630    fn class(&self) -> InvariantClass; // Structural, Semantic, or Acceptance
631    fn check(&self, ctx: &Context) -> InvariantResult;
632}}
633```
634
635The Context has typed facts in categories: Seeds, Hypotheses, Strategies, Constraints, Signals, Competitors, Evaluations.
636
637Feature: {}
638Scenario: {}
639Steps:
640{}
641
642Can this scenario be implemented as a Converge Invariant?
643
644Respond with ONLY one of:
645- COMPILABLE: <invariant_class> - brief description of implementation
646- NOT_COMPILABLE: <reason why it cannot be a runtime check>
647- NEEDS_REFACTOR: <suggestion to make it compilable>",
648            feature.name,
649            scenario.name,
650            format_steps(&scenario.steps)
651        );
652
653        let system_prompt =
654            "You are a Rust expert. Be precise about what can be checked at runtime.";
655        let request = LlmRequest::new(prompt.clone())
656            .with_system(system_prompt)
657            .with_max_tokens(200)
658            .with_temperature(0.3);
659
660        eprintln!("\n📤 Compilability Check - Sending to LLM:");
661        eprintln!("   Scenario: {}", scenario.name);
662        eprintln!("   System Prompt: {system_prompt}");
663        eprintln!(
664            "   User Prompt (first 200 chars): {}...",
665            prompt.chars().take(200).collect::<String>()
666        );
667        eprintln!("   Request params: max_tokens=200, temperature=0.3");
668
669        let response = self.provider.complete(&request).map_err(|e| {
670            // LLM API errors are not Gherkin validation issues
671            ValidationError::LlmError(format!("NOT_RELATED_ERROR: LLM API call failed: {e}"))
672        })?;
673
674        eprintln!("\n📥 Compilability Check - Response from LLM:");
675        eprintln!("   Raw response: {}", response.content);
676        eprintln!("   Model: {}", response.model);
677        eprintln!(
678            "   Token usage: prompt={}, completion={}, total={}",
679            response.usage.prompt_tokens,
680            response.usage.completion_tokens,
681            response.usage.total_tokens
682        );
683        eprintln!("   Finish reason: {:?}", response.finish_reason);
684
685        let content = response.content.trim();
686        eprintln!("\n🔍 Compilability Check - Reasoning:");
687
688        if content.starts_with("NOT_COMPILABLE:") {
689            let reason = content.strip_prefix("NOT_COMPILABLE:").unwrap_or("").trim();
690            eprintln!("   → Detected: NOT_COMPILABLE");
691            eprintln!("   → Reason: {reason}");
692            eprintln!("   → Action: Creating Error-level ValidationIssue");
693            Ok(Some(ValidationIssue {
694                location: format!("Scenario: {}", scenario.name),
695                category: IssueCategory::Compilability,
696                severity: Severity::Error,
697                message: format!("Cannot compile to invariant: {reason}"),
698                suggestion: None,
699            }))
700        } else if content.starts_with("NEEDS_REFACTOR:") {
701            let suggestion = content.strip_prefix("NEEDS_REFACTOR:").unwrap_or("").trim();
702            eprintln!("   → Detected: NEEDS_REFACTOR");
703            eprintln!("   → Suggestion: {suggestion}");
704            eprintln!(
705                "   → Action: Creating Warning-level ValidationIssue with refactoring suggestion"
706            );
707            Ok(Some(ValidationIssue {
708                location: format!("Scenario: {}", scenario.name),
709                category: IssueCategory::Compilability,
710                severity: Severity::Warning,
711                message: "Scenario needs refactoring to be compilable".to_string(),
712                suggestion: Some(suggestion.to_string()),
713            }))
714        } else if content.starts_with("COMPILABLE:") {
715            let details = content.strip_prefix("COMPILABLE:").unwrap_or("").trim();
716            eprintln!("   → Detected: COMPILABLE");
717            eprintln!("   → Details: {details}");
718            eprintln!("   → Action: No issue created (scenario is compilable)");
719            Ok(None) // COMPILABLE
720        } else {
721            eprintln!("   → Warning: Response doesn't match expected format");
722            eprintln!("   → Raw response: {content}");
723            eprintln!("   → Action: Treating as COMPILABLE (no issue created)");
724            Ok(None) // Default to compilable if format doesn't match
725        }
726    }
727
728    /// Checks scenario against Converge Gherkin conventions (no LLM needed).
729    fn check_conventions(&self, scenario: &gherkin::Scenario) -> Vec<ValidationIssue> {
730        let mut issues = Vec::new();
731
732        // Check scenario naming convention
733        if scenario.name.is_empty() {
734            issues.push(ValidationIssue {
735                location: "Scenario".to_string(),
736                category: IssueCategory::Convention,
737                severity: Severity::Error,
738                message: "Scenario has no name".to_string(),
739                suggestion: Some("Add a descriptive name".to_string()),
740            });
741        }
742
743        // Check for Given/When/Then structure
744        let has_given = scenario
745            .steps
746            .iter()
747            .any(|s| matches!(s.ty, gherkin::StepType::Given));
748        let has_when = scenario
749            .steps
750            .iter()
751            .any(|s| matches!(s.ty, gherkin::StepType::When));
752        let has_then = scenario
753            .steps
754            .iter()
755            .any(|s| matches!(s.ty, gherkin::StepType::Then));
756
757        if !has_given && !has_when {
758            issues.push(ValidationIssue {
759                location: format!("Scenario: {}", scenario.name),
760                category: IssueCategory::Convention,
761                severity: Severity::Warning,
762                message: "Scenario lacks Given or When steps".to_string(),
763                suggestion: Some("Add preconditions (Given) or actions (When)".to_string()),
764            });
765        }
766
767        if !has_then {
768            issues.push(ValidationIssue {
769                location: format!("Scenario: {}", scenario.name),
770                category: IssueCategory::Convention,
771                severity: Severity::Error,
772                message: "Scenario lacks Then steps (expected outcomes)".to_string(),
773                suggestion: Some(
774                    "Add at least one Then step defining the expected outcome".to_string(),
775                ),
776            });
777        }
778
779        // Check for Converge-specific patterns
780        for step in &scenario.steps {
781            if step.value.contains("should") && matches!(step.ty, gherkin::StepType::Then) {
782                // Good pattern: "Then X should Y"
783            } else if step.value.contains("must") || step.value.contains("always") {
784                // Good pattern for invariants
785            } else if step.value.contains("might") || step.value.contains("maybe") {
786                issues.push(ValidationIssue {
787                    location: format!("Step: {}", step.value),
788                    category: IssueCategory::Convention,
789                    severity: Severity::Warning,
790                    message: "Uncertain language in step ('might', 'maybe')".to_string(),
791                    suggestion: Some("Use definite language for testable assertions".to_string()),
792                });
793            }
794        }
795
796        issues
797    }
798}
799
800/// LLM-powered Gherkin specification generator.
801pub struct SpecGenerator {
802    provider: Arc<dyn LlmProvider>,
803}
804
805impl SpecGenerator {
806    /// Creates a new generator with the given LLM provider.
807    #[must_use]
808    pub fn new(provider: Arc<dyn LlmProvider>) -> Self {
809        Self { provider }
810    }
811
812    /// Generates a Gherkin/Truth specification from free text.
813    ///
814    /// # Errors
815    ///
816    /// Returns error if the LLM API call fails.
817    pub fn generate_from_text(&self, text: &str) -> Result<String, ValidationError> {
818        let prompt = format!(
819            r"You are a requirements engineer for a multi-agent AI system called Converge.
820Convert the following free text into a valid Gherkin/Truth specification.
821
822Free Text:
823{text}
824
825Rules for generation:
8261. Use Converge Truth syntax (`Truth:` instead of `Feature:`).
8272. Include a concise business description immediately after the Truth header.
8283. Ensure at least one scenario is generated.
8294. Each scenario must have Given/When/Then steps.
8305. Use definite language (avoid 'might', 'maybe').
8316. Focus on testable business outcomes.
832
833Return ONLY the Gherkin content, no explanation or preamble.
834
835Example Format:
836Truth: <name>
837  <description line 1>
838  <description line 2>
839
840  Scenario: <name>
841    Given <state>
842    When <action>
843    Then <outcome>"
844        );
845
846        let system_prompt =
847            "You are an expert Gherkin spec writer. Respond with ONLY the specification.";
848        let request = LlmRequest::new(prompt)
849            .with_system(system_prompt)
850            .with_max_tokens(1000)
851            .with_temperature(0.3);
852
853        let response = self
854            .provider
855            .complete(&request)
856            .map_err(|e| ValidationError::LlmError(format!("LLM API call failed: {e}")))?;
857
858        Ok(response.content.trim().to_string())
859    }
860}
861
862/// Formats Gherkin steps for display.
863fn format_steps(steps: &[gherkin::Step]) -> String {
864    steps
865        .iter()
866        .map(|s| format!("{:?} {}", s.keyword, s.value))
867        .collect::<Vec<_>>()
868        .join("\n")
869}
870
871/// Error during Gherkin validation.
872#[derive(Debug, Clone)]
873pub enum ValidationError {
874    /// Failed to parse the Gherkin file.
875    ParseError(String),
876    /// IO error reading file.
877    IoError(String),
878    /// LLM call failed.
879    LlmError(String),
880}
881
882impl std::fmt::Display for ValidationError {
883    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
884        match self {
885            Self::ParseError(msg) => write!(f, "Parse error: {msg}"),
886            Self::IoError(msg) => write!(f, "IO error: {msg}"),
887            Self::LlmError(msg) => write!(f, "LLM error: {msg}"),
888        }
889    }
890}
891
892impl std::error::Error for ValidationError {}
893
894#[cfg(test)]
895mod tests {
896    use super::*;
897    use converge_core::llm::{MockProvider, MockResponse};
898
899    fn mock_valid_provider() -> Arc<dyn LlmProvider> {
900        Arc::new(MockProvider::new(vec![
901            MockResponse::success("VALID", 0.9),
902            MockResponse::success("COMPILABLE: Acceptance - check strategy count", 0.9),
903        ]))
904    }
905
906    #[test]
907    fn preprocess_converts_truth_to_feature() {
908        let input = "Truth: Get paid for delivered work\n  Scenario: Invoice";
909        let output = preprocess_truths(input);
910        assert!(output.starts_with("Feature:"));
911        assert!(output.contains("Scenario: Invoice"));
912    }
913
914    #[test]
915    fn preprocess_preserves_feature_keyword() {
916        let input = "Feature: Standard Gherkin\n  Scenario: Test";
917        let output = preprocess_truths(input);
918        assert_eq!(input, output);
919    }
920
921    #[test]
922    fn validation_config_default() {
923        let config = ValidationConfig::default();
924        assert!(config.check_conventions);
925        assert!(config.check_business_sense);
926        assert!(config.check_compilability);
927        assert_eq!(config.min_confidence, 0.7);
928    }
929
930    #[test]
931    fn validation_config_custom() {
932        let config = ValidationConfig {
933            check_business_sense: false,
934            min_confidence: 0.9,
935            ..ValidationConfig::default()
936        };
937        assert!(!config.check_business_sense);
938        assert_eq!(config.min_confidence, 0.9);
939        assert!(config.check_conventions);
940    }
941
942    #[test]
943    fn validates_truth_syntax() {
944        let content = r"
945Truth: Get paid for delivered work
946  Scenario: Invoice and collect
947    Given work is marked as delivered
948    When the system converges
949    Then invoice is issued
950";
951
952        let validator = GherkinValidator::new(mock_valid_provider(), ValidationConfig::default());
953
954        let result = validator.validate(content, "money.truth").unwrap();
955
956        assert_eq!(result.scenario_count, 1);
957        // Should parse successfully with Truth: syntax and .truth extension
958    }
959
960    #[test]
961    fn validates_simple_feature() {
962        let content = r"
963Feature: Growth Strategy Validation
964  Scenario: Multiple strategies required
965    When the system converges
966    Then at least two distinct growth strategies exist
967";
968
969        let validator = GherkinValidator::new(mock_valid_provider(), ValidationConfig::default());
970
971        let result = validator.validate(content, "test.feature").unwrap();
972
973        assert_eq!(result.scenario_count, 1);
974        // May have convention warnings but should be parseable
975    }
976
977    #[test]
978    fn detects_missing_then() {
979        let content = r"
980Feature: Bad Spec
981  Scenario: No assertions
982    Given some precondition
983    When something happens
984";
985
986        let validator = GherkinValidator::new(
987            mock_valid_provider(),
988            ValidationConfig {
989                check_business_sense: false,
990                check_compilability: false,
991                check_conventions: true,
992                min_confidence: 0.7,
993            },
994        );
995
996        let result = validator.validate(content, "bad.feature").unwrap();
997
998        assert!(result.has_errors());
999        assert!(
1000            result
1001                .issues
1002                .iter()
1003                .any(|i| i.category == IssueCategory::Convention && i.message.contains("Then"))
1004        );
1005    }
1006
1007    #[test]
1008    fn detects_uncertain_language() {
1009        let content = r"
1010Feature: Uncertain Spec
1011  Scenario: Maybe works
1012    When something happens
1013    Then it might succeed
1014";
1015
1016        let validator = GherkinValidator::new(
1017            mock_valid_provider(),
1018            ValidationConfig {
1019                check_business_sense: false,
1020                check_compilability: false,
1021                check_conventions: true,
1022                min_confidence: 0.7,
1023            },
1024        );
1025
1026        let result = validator.validate(content, "uncertain.feature").unwrap();
1027
1028        assert!(result.has_warnings());
1029        assert!(result.issues.iter().any(|i| i.message.contains("might")));
1030    }
1031
1032    #[test]
1033    fn handles_llm_invalid_response() {
1034        let provider = Arc::new(MockProvider::new(vec![
1035            MockResponse::success("INVALID: The scenario describes an untestable state", 0.8),
1036            MockResponse::success("COMPILABLE: Acceptance", 0.9),
1037        ]));
1038
1039        let content = r"
1040Feature: Test
1041  Scenario: Bad business logic
1042    When magic happens
1043    Then everything is perfect forever
1044";
1045
1046        let validator = GherkinValidator::new(provider, ValidationConfig::default());
1047
1048        let result = validator.validate(content, "test.feature").unwrap();
1049
1050        assert!(
1051            result.issues.iter().any(
1052                |i| i.category == IssueCategory::BusinessSense && i.severity == Severity::Error
1053            )
1054        );
1055    }
1056
1057    #[test]
1058    fn generates_spec_from_text() {
1059        let mock_spec = "Truth: Test\n  Scenario: Test\n    Given X\n    Then Y";
1060        let provider = Arc::new(MockProvider::new(vec![MockResponse::success(
1061            mock_spec, 0.9,
1062        )]));
1063
1064        let generator = SpecGenerator::new(provider);
1065        let result = generator.generate_from_text("Make a test spec").unwrap();
1066
1067        assert_eq!(result, mock_spec);
1068    }
1069
1070    // =========================================================================
1071    // Tag Extraction Tests
1072    // =========================================================================
1073
1074    #[test]
1075    fn extract_invariant_structural_tags() {
1076        let tags = vec![
1077            "invariant".to_string(),
1078            "structural".to_string(),
1079            "id:brand_safety".to_string(),
1080        ];
1081        let meta = extract_scenario_meta("Strategies must not contain brand-unsafe terms", &tags);
1082
1083        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1084        assert_eq!(meta.invariant_class, Some(InvariantClassTag::Structural));
1085        assert_eq!(meta.id.as_deref(), Some("brand_safety"));
1086        assert!(!meta.is_test);
1087        assert!(meta.provider.is_none());
1088    }
1089
1090    #[test]
1091    fn extract_invariant_acceptance_tags() {
1092        let tags = vec![
1093            "invariant".to_string(),
1094            "acceptance".to_string(),
1095            "id:require_multiple_strategies".to_string(),
1096        ];
1097        let meta = extract_scenario_meta("At least 2 strategies must exist", &tags);
1098
1099        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1100        assert_eq!(meta.invariant_class, Some(InvariantClassTag::Acceptance));
1101        assert_eq!(meta.id.as_deref(), Some("require_multiple_strategies"));
1102    }
1103
1104    #[test]
1105    fn extract_invariant_semantic_tags() {
1106        let tags = vec![
1107            "invariant".to_string(),
1108            "semantic".to_string(),
1109            "id:require_evaluation_rationale".to_string(),
1110        ];
1111        let meta = extract_scenario_meta("Evaluations must include score", &tags);
1112
1113        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1114        assert_eq!(meta.invariant_class, Some(InvariantClassTag::Semantic));
1115        assert_eq!(meta.id.as_deref(), Some("require_evaluation_rationale"));
1116    }
1117
1118    #[test]
1119    fn extract_validation_tags() {
1120        let tags = vec![
1121            "validation".to_string(),
1122            "id:confidence_threshold".to_string(),
1123        ];
1124        let meta = extract_scenario_meta("Proposals must meet confidence threshold", &tags);
1125
1126        assert_eq!(meta.kind, Some(ScenarioKind::Validation));
1127        assert!(meta.invariant_class.is_none());
1128        assert_eq!(meta.id.as_deref(), Some("confidence_threshold"));
1129    }
1130
1131    #[test]
1132    fn extract_agent_llm_tags() {
1133        let tags = vec![
1134            "agent".to_string(),
1135            "llm".to_string(),
1136            "id:market_signal".to_string(),
1137        ];
1138        let meta = extract_scenario_meta("Market Signal agent proposes Signals", &tags);
1139
1140        assert_eq!(meta.kind, Some(ScenarioKind::Agent));
1141        assert_eq!(meta.provider.as_deref(), Some("llm"));
1142        assert_eq!(meta.id.as_deref(), Some("market_signal"));
1143    }
1144
1145    #[test]
1146    fn extract_e2e_test_tags() {
1147        let tags = vec!["e2e".to_string(), "test".to_string()];
1148        let meta = extract_scenario_meta("Pack converges from Seeds", &tags);
1149
1150        assert_eq!(meta.kind, Some(ScenarioKind::EndToEnd));
1151        assert!(meta.is_test);
1152        assert!(meta.id.is_none());
1153    }
1154
1155    #[test]
1156    fn extract_with_at_prefix() {
1157        // gherkin crate may include @ prefix — we handle both
1158        let tags = vec![
1159            "@invariant".to_string(),
1160            "@structural".to_string(),
1161            "@id:brand_safety".to_string(),
1162        ];
1163        let meta = extract_scenario_meta("Test with @ prefix", &tags);
1164
1165        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1166        assert_eq!(meta.invariant_class, Some(InvariantClassTag::Structural));
1167        assert_eq!(meta.id.as_deref(), Some("brand_safety"));
1168    }
1169
1170    #[test]
1171    fn extract_no_tags() {
1172        let meta = extract_scenario_meta("Untagged scenario", &[]);
1173
1174        assert!(meta.kind.is_none());
1175        assert!(meta.invariant_class.is_none());
1176        assert!(meta.id.is_none());
1177        assert!(!meta.is_test);
1178    }
1179
1180    #[test]
1181    fn extract_unknown_tags_preserved() {
1182        let tags = vec![
1183            "custom".to_string(),
1184            "invariant".to_string(),
1185            "id:test".to_string(),
1186        ];
1187        let meta = extract_scenario_meta("With custom tag", &tags);
1188
1189        assert_eq!(meta.raw_tags.len(), 3);
1190        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1191    }
1192
1193    #[test]
1194    fn extract_all_metas_from_truth_file() {
1195        let content = r#"
1196Truth: Growth Strategy Pack
1197  Multi-agent growth strategy analysis.
1198
1199  @invariant @structural @id:brand_safety
1200  Scenario: Strategies must not contain brand-unsafe terms
1201    Given any fact under key "Strategies"
1202    Then it must not contain forbidden terms
1203
1204  @invariant @acceptance @id:require_multiple_strategies
1205  Scenario: At least 2 strategies must exist at convergence
1206    Given the engine halts with reason "Converged"
1207    Then the Context key "Strategies" contains at least 2 facts
1208
1209  @agent @llm @id:market_signal
1210  Scenario: Market Signal agent proposes Signals from Seeds
1211    Given the Context contains facts under key "Seeds"
1212    When agent "market_signal" executes
1213    Then it proposes facts under key "Signals"
1214
1215  @e2e @test
1216  Scenario: Pack converges from Seeds to evaluated Strategies
1217    Given seed facts are present
1218    When the pack runs to convergence
1219    Then all invariants pass
1220"#;
1221
1222        let metas = extract_all_metas(content).unwrap();
1223        assert_eq!(metas.len(), 4);
1224
1225        // First: structural invariant
1226        assert_eq!(metas[0].kind, Some(ScenarioKind::Invariant));
1227        assert_eq!(
1228            metas[0].invariant_class,
1229            Some(InvariantClassTag::Structural)
1230        );
1231        assert_eq!(metas[0].id.as_deref(), Some("brand_safety"));
1232
1233        // Second: acceptance invariant
1234        assert_eq!(metas[1].kind, Some(ScenarioKind::Invariant));
1235        assert_eq!(
1236            metas[1].invariant_class,
1237            Some(InvariantClassTag::Acceptance)
1238        );
1239        assert_eq!(metas[1].id.as_deref(), Some("require_multiple_strategies"));
1240
1241        // Third: agent
1242        assert_eq!(metas[2].kind, Some(ScenarioKind::Agent));
1243        assert_eq!(metas[2].provider.as_deref(), Some("llm"));
1244
1245        // Fourth: e2e test
1246        assert_eq!(metas[3].kind, Some(ScenarioKind::EndToEnd));
1247        assert!(metas[3].is_test);
1248    }
1249
1250    #[test]
1251    fn validator_populates_scenario_metas() {
1252        let content = r#"
1253Truth: Test
1254  @invariant @structural @id:test_inv
1255  Scenario: Test invariant
1256    Given precondition
1257    When action occurs
1258    Then outcome is verified
1259"#;
1260
1261        let validator = GherkinValidator::new(mock_valid_provider(), ValidationConfig::default());
1262        let result = validator.validate(content, "test.truth").unwrap();
1263
1264        assert_eq!(result.scenario_metas.len(), 1);
1265        assert_eq!(result.scenario_metas[0].kind, Some(ScenarioKind::Invariant));
1266        assert_eq!(result.scenario_metas[0].id.as_deref(), Some("test_inv"));
1267    }
1268
1269    // =========================================================================
1270    // Negative tests for tag extraction
1271    // =========================================================================
1272
1273    #[test]
1274    fn extract_meta_invariant_without_class() {
1275        // Invariant without a class tag should still be recognized as invariant
1276        let tags = vec!["invariant".to_string(), "id:no_class".to_string()];
1277        let meta = extract_scenario_meta("Invariant without class", &tags);
1278
1279        assert_eq!(meta.kind, Some(ScenarioKind::Invariant));
1280        assert!(meta.invariant_class.is_none()); // no class is valid — pipeline may warn later
1281    }
1282
1283    #[test]
1284    fn extract_meta_class_without_kind() {
1285        // Class without invariant kind — should preserve class but no kind
1286        let tags = vec!["structural".to_string()];
1287        let meta = extract_scenario_meta("Orphan class", &tags);
1288
1289        assert!(meta.kind.is_none());
1290        assert_eq!(meta.invariant_class, Some(InvariantClassTag::Structural));
1291    }
1292
1293    #[test]
1294    fn extract_meta_empty_id() {
1295        // @id: with no value after colon
1296        let tags = vec!["invariant".to_string(), "id:".to_string()];
1297        let meta = extract_scenario_meta("Empty id", &tags);
1298
1299        assert_eq!(meta.id.as_deref(), Some(""));
1300    }
1301
1302    #[test]
1303    fn extract_all_metas_parse_error() {
1304        let bad = "This is not valid Gherkin at all";
1305        let result = extract_all_metas(bad);
1306        assert!(result.is_err());
1307    }
1308
1309    mod property_tests {
1310        use super::*;
1311        use proptest::prelude::*;
1312
1313        proptest! {
1314            #[test]
1315            fn preprocess_never_crashes(s in "\\PC*") {
1316                let _ = preprocess_truths(&s);
1317            }
1318
1319            #[test]
1320            fn truth_to_feature_conversion(s in ".*Truth:.*") {
1321                let _output = preprocess_truths(&s);
1322                // If the line started with Truth:, it should now start with Feature:
1323                // Note: preprocess_truths uses Regex with (?m)^(\s*)Truth:
1324                // We should check if the conversion happened for lines meeting the pattern
1325            }
1326
1327            #[test]
1328            fn idempotency_of_feature(s in ".*Feature:.*") {
1329                // If it already has Feature:, it shouldn't change to something else incorrect
1330                // specifically, it shouldn't contain "Truth:" where "Feature:" was
1331                if !s.contains("Truth:") {
1332                    let output = preprocess_truths(&s);
1333                    assert_eq!(s, output);
1334                }
1335            }
1336
1337            #[test]
1338            fn extract_meta_never_crashes(
1339                name in "\\PC{0,100}",
1340                tags in proptest::collection::vec("[a-z:_@]{1,30}", 0..10)
1341            ) {
1342                let _ = extract_scenario_meta(&name, &tags);
1343            }
1344
1345            #[test]
1346            fn extract_meta_preserves_all_raw_tags(
1347                tags in proptest::collection::vec("[a-z]{1,10}", 0..5)
1348            ) {
1349                let meta = extract_scenario_meta("test", &tags);
1350                assert_eq!(meta.raw_tags.len(), tags.len());
1351            }
1352
1353            #[test]
1354            fn extract_meta_id_always_from_id_prefix(
1355                suffix in "[a-z_]{1,20}"
1356            ) {
1357                let tags = vec![format!("id:{suffix}")];
1358                let meta = extract_scenario_meta("test", &tags);
1359                assert_eq!(meta.id.as_deref(), Some(suffix.as_str()));
1360            }
1361        }
1362    }
1363}
converge_tool/gherkin.rs

converge_tool/
gherkin.rs