use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Predicate {
CountAtLeast { key: String, min: usize },
CountAtMost { key: String, max: usize },
ContentMustNotContain {
key: String,
forbidden: Vec<ForbiddenTerm>,
},
ContentMustContain { key: String, required_field: String },
CrossReference {
source_key: String,
target_key: String,
},
HasFacts { key: String },
RequiredFields {
key: String,
fields: Vec<FieldRequirement>,
},
Custom { description: String },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ForbiddenTerm {
pub term: String,
pub reason: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FieldRequirement {
pub field: String,
pub rule: String,
}
#[derive(Debug, Clone)]
pub enum PredicateError {
UnknownContextKey(String),
ParseError(String),
}
impl std::fmt::Display for PredicateError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnknownContextKey(key) => write!(f, "unknown context key: {key}"),
Self::ParseError(msg) => write!(f, "parse error: {msg}"),
}
}
}
impl std::error::Error for PredicateError {}
const KNOWN_KEYS: &[&str] = &[
"Seeds",
"Hypotheses",
"Strategies",
"Constraints",
"Signals",
"Competitors",
"Evaluations",
];
fn is_valid_key(key: &str) -> bool {
KNOWN_KEYS.contains(&key)
}
pub fn parse_steps(
steps: &[(&str, &str, Vec<Vec<String>>)],
) -> Result<Vec<Predicate>, PredicateError> {
let mut predicates = Vec::new();
let mut current_key: Option<String> = None;
for (step_type, text, table) in steps {
match *step_type {
"Given" => {
if text.contains("engine halts") || text.contains("engine is") {
continue;
}
if let Some(key) = extract_context_key(text)
&& is_valid_key(&key)
{
current_key = Some(key.clone());
if text.contains("any fact") || text.contains("facts") {
predicates.push(Predicate::HasFacts { key });
}
}
}
"Then" => {
let pred = parse_then_step(text, table, current_key.as_ref())?;
predicates.push(pred);
}
"And" => {
if text.contains("must include") || text.contains("must contain") {
let pred = parse_then_step(text, table, current_key.as_ref())?;
predicates.push(pred);
}
}
_ => {} }
}
Ok(predicates)
}
fn parse_then_step(
text: &str,
table: &[Vec<String>],
current_key: Option<&String>,
) -> Result<Predicate, PredicateError> {
let count_at_least = Regex::new(r"(?:contains?|at least)\s+(\d+)\s+facts?").unwrap();
if let Some(caps) = count_at_least.captures(text) {
let min: usize = caps[1].parse().unwrap_or(1);
let key = extract_context_key(text)
.or_else(|| current_key.cloned())
.unwrap_or_default();
if !key.is_empty() {
validate_key(&key)?;
}
return Ok(Predicate::CountAtLeast { key, min });
}
let count_at_most = Regex::new(r"at most\s+(\d+)\s+facts?").unwrap();
if let Some(caps) = count_at_most.captures(text) {
let max: usize = caps[1].parse().unwrap_or(1);
let key = extract_context_key(text)
.or_else(|| current_key.cloned())
.unwrap_or_default();
if !key.is_empty() {
validate_key(&key)?;
}
return Ok(Predicate::CountAtMost { key, max });
}
if text.contains("must not contain") {
let key = current_key.cloned().unwrap_or_default();
let forbidden = parse_forbidden_terms(table);
return Ok(Predicate::ContentMustNotContain { key, forbidden });
}
let cross_ref =
Regex::new(r"for each\s+(\w+)\s+fact.*?exists?\s+(?:a |an )?(\w+)\s+fact").unwrap();
if let Some(caps) = cross_ref.captures(text) {
let source_key = caps[1].to_string();
let target_key = caps[2].to_string();
return Ok(Predicate::CrossReference {
source_key,
target_key,
});
}
if (text.contains("must include") || text.contains("must contain a field")) && !table.is_empty()
{
let key = current_key.cloned().unwrap_or_default();
let fields = parse_field_requirements(table);
return Ok(Predicate::RequiredFields { key, fields });
}
let field_pattern = Regex::new(r#"must contain (?:a )?field\s+"(\w+)""#).unwrap();
if let Some(caps) = field_pattern.captures(text) {
let key = current_key.cloned().unwrap_or_default();
return Ok(Predicate::ContentMustContain {
key,
required_field: caps[1].to_string(),
});
}
Ok(Predicate::Custom {
description: text.to_string(),
})
}
fn extract_context_key(text: &str) -> Option<String> {
let re = Regex::new(r#""(\w+)""#).unwrap();
re.captures(text).map(|caps| caps[1].to_string())
}
fn validate_key(key: &str) -> Result<(), PredicateError> {
if is_valid_key(key) {
Ok(())
} else {
Err(PredicateError::UnknownContextKey(key.to_string()))
}
}
fn parse_forbidden_terms(table: &[Vec<String>]) -> Vec<ForbiddenTerm> {
table
.iter()
.filter(|row| row.len() >= 2)
.map(|row| ForbiddenTerm {
term: row[0].clone(),
reason: row[1].clone(),
})
.collect()
}
fn parse_field_requirements(table: &[Vec<String>]) -> Vec<FieldRequirement> {
table
.iter()
.filter(|row| row.len() >= 2)
.map(|row| FieldRequirement {
field: row[0].clone(),
rule: row[1].clone(),
})
.collect()
}
pub fn extract_dependencies(predicates: &[Predicate]) -> Vec<String> {
let mut deps = std::collections::BTreeSet::new();
for pred in predicates {
match pred {
Predicate::CountAtLeast { key, .. }
| Predicate::CountAtMost { key, .. }
| Predicate::ContentMustNotContain { key, .. }
| Predicate::ContentMustContain { key, .. }
| Predicate::HasFacts { key }
| Predicate::RequiredFields { key, .. } => {
if !key.is_empty() {
deps.insert(key.clone());
}
}
Predicate::CrossReference {
source_key,
target_key,
} => {
deps.insert(source_key.clone());
deps.insert(target_key.clone());
}
Predicate::Custom { .. } => {}
}
}
deps.into_iter().collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_count_at_least() {
let steps = vec![(
"Then",
r#"the Context key "Strategies" contains at least 2 facts"#,
vec![],
)];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 1);
assert!(matches!(
&preds[0],
Predicate::CountAtLeast { key, min: 2 } if key == "Strategies"
));
}
#[test]
fn parse_count_at_least_with_given_context() {
let steps = vec![
(
"Given",
r#"the engine halts with reason "Converged""#,
vec![],
),
(
"Then",
r#"the Context key "Strategies" contains at least 2 facts"#,
vec![],
),
];
let preds = parse_steps(&steps).unwrap();
assert!(matches!(
&preds[0],
Predicate::CountAtLeast { key, min: 2 } if key == "Strategies"
));
}
#[test]
fn parse_forbidden_terms_with_table() {
let steps = vec![
("Given", r#"any fact under key "Strategies""#, vec![]),
(
"Then",
"it must not contain any forbidden term:",
vec![
vec!["spam".to_string(), "illegal marketing".to_string()],
vec!["bot army".to_string(), "fake engagement".to_string()],
],
),
];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 2);
assert!(matches!(&preds[0], Predicate::HasFacts { key } if key == "Strategies"));
match &preds[1] {
Predicate::ContentMustNotContain { key, forbidden } => {
assert_eq!(key, "Strategies");
assert_eq!(forbidden.len(), 2);
assert_eq!(forbidden[0].term, "spam");
assert_eq!(forbidden[1].reason, "fake engagement");
}
_ => panic!("expected ContentMustNotContain"),
}
}
#[test]
fn parse_cross_reference() {
let steps = vec![(
"Then",
"for each Strategy fact there exists an Evaluation fact referencing it",
vec![],
)];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 1);
assert!(matches!(
&preds[0],
Predicate::CrossReference { source_key, target_key }
if source_key == "Strategy" && target_key == "Evaluation"
));
}
#[test]
fn parse_required_fields_with_table() {
let steps = vec![
("Given", r#"any fact under key "Evaluations""#, vec![]),
(
"Then",
"it must include:",
vec![
vec!["score".to_string(), "integer between 0..100".to_string()],
vec!["rationale".to_string(), "non-empty string".to_string()],
],
),
];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 2); match &preds[1] {
Predicate::RequiredFields { key, fields } => {
assert_eq!(key, "Evaluations");
assert_eq!(fields.len(), 2);
assert_eq!(fields[0].field, "score");
assert_eq!(fields[1].field, "rationale");
}
_ => panic!("expected RequiredFields"),
}
}
#[test]
fn parse_content_must_contain_field() {
let steps = vec![
("Given", r#"any fact under key "Strategies""#, vec![]),
(
"Then",
r#"it must contain a field "compliance_ref" with a non-empty value"#,
vec![],
),
];
let preds = parse_steps(&steps).unwrap();
assert!(matches!(
&preds[1],
Predicate::ContentMustContain { key, required_field }
if key == "Strategies" && required_field == "compliance_ref"
));
}
#[test]
fn parse_has_facts() {
let steps = vec![(
"Given",
r#"the Context contains facts under key "Signals""#,
vec![],
)];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 1);
assert!(matches!(&preds[0], Predicate::HasFacts { key } if key == "Signals"));
}
#[test]
fn unrecognized_step_becomes_custom() {
let steps = vec![("Then", "something completely different happens", vec![])];
let preds = parse_steps(&steps).unwrap();
assert_eq!(preds.len(), 1);
assert!(
matches!(&preds[0], Predicate::Custom { description } if description.contains("completely different"))
);
}
#[test]
fn unknown_context_key_in_then_step_error() {
let steps = vec![(
"Then",
r#"the Context key "Widgets" contains at least 2 facts"#,
vec![],
)];
let result = parse_steps(&steps);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
PredicateError::UnknownContextKey(k) if k == "Widgets"
));
}
#[test]
fn unknown_key_in_given_is_ignored() {
let steps = vec![("Given", r#"any fact under key "Widgets""#, vec![])];
let result = parse_steps(&steps);
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
}
#[test]
fn empty_steps_produces_no_predicates() {
let steps: Vec<(&str, &str, Vec<Vec<String>>)> = vec![];
let preds = parse_steps(&steps).unwrap();
assert!(preds.is_empty());
}
#[test]
fn extract_deps_from_predicates() {
let preds = vec![
Predicate::CountAtLeast {
key: "Strategies".to_string(),
min: 2,
},
Predicate::CrossReference {
source_key: "Strategies".to_string(),
target_key: "Evaluations".to_string(),
},
Predicate::HasFacts {
key: "Seeds".to_string(),
},
];
let deps = extract_dependencies(&preds);
assert_eq!(deps, vec!["Evaluations", "Seeds", "Strategies"]);
}
#[test]
fn extract_deps_deduplicates() {
let preds = vec![
Predicate::HasFacts {
key: "Strategies".to_string(),
},
Predicate::CountAtLeast {
key: "Strategies".to_string(),
min: 1,
},
];
let deps = extract_dependencies(&preds);
assert_eq!(deps, vec!["Strategies"]);
}
#[test]
fn custom_predicates_have_no_deps() {
let preds = vec![Predicate::Custom {
description: "something".to_string(),
}];
let deps = extract_dependencies(&preds);
assert!(deps.is_empty());
}
mod property_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn any_step_produces_predicate_or_error(text in "\\PC{1,100}") {
let steps = vec![("Then", text.as_str(), vec![])];
let _ = parse_steps(&steps);
}
#[test]
fn count_pattern_always_parses(n in 1usize..1000, key in prop::sample::select(KNOWN_KEYS)) {
let text = format!(r#"the Context key "{key}" contains at least {n} facts"#);
let steps = vec![("Then", text.as_str(), vec![])];
let preds = parse_steps(&steps).unwrap();
assert!(matches!(&preds[0], Predicate::CountAtLeast { min, .. } if *min == n));
}
#[test]
fn dependency_extraction_never_crashes(
keys in proptest::collection::vec("[A-Z][a-z]{3,10}", 0..5)
) {
let preds: Vec<Predicate> = keys.iter().map(|k| Predicate::HasFacts { key: k.clone() }).collect();
let _ = extract_dependencies(&preds);
}
}
}
}