1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
//! Tolerant parsing helpers for XSD validation.
//!
//! This module provides functionality for tolerant (fuzzy) parsing of XML content
//! that is semantically correct but may differ in minor structural ways from
//! the exact schema layout.
//!
//! # Tolerant Behavior
//!
//! The tolerant parsing helpers allow:
//! - **Enum normalization**: Status values that clearly correspond to expected outcomes
//! are accepted even when using synonyms or different casing (e.g., "done" → "completed").
//! - **Tag name fuzzy matching**: Minor typos in tag names (e.g., <ralph-sumary> instead of
//! <ralph-summary>) are accepted if they unambiguously resolve to a known tag.
//! - **Unknown element skipping**: Unknown child elements are skipped instead of causing
//! validation failure. Required elements are still enforced.
//! - **Stray text tolerance**: Whitespace or non-semantic text between elements is ignored.
//!
//! # Rejection Boundary
//!
//! Truly ambiguous or incompatible responses are still rejected:
//! - Values not in the synonym table return `None` from `normalize_enum_value`
//! - Empty values are rejected
//! - Values that could plausibly map to multiple different outcomes are not added to tables
//! - Tag names with ambiguous fuzzy matches (multiple known tags within edit-distance threshold)
//! return `None` and are skipped as unknown elements
//!
//! # Synonym Tables
//!
//! The synonym tables contain only conservative, unambiguous mappings. Each mapping
//! should only be added when the intent is clearly unambiguous (e.g., "done" clearly
//! means "completed", not "partial").
/// Synonym mappings for development result status values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"completed"`, `"partial"`, `"failed"`
pub const DEVELOPMENT_STATUS_SYNONYMS: & = &;
/// Synonym mappings for fix result status values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"all_issues_addressed"`, `"issues_remain"`, `"no_issues_found"`
pub const FIX_STATUS_SYNONYMS: & = &;
/// Synonym mappings for plan `FileAction` enum values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"create"`, `"modify"`, `"delete"`
pub const FILE_ACTION_SYNONYMS: & = &;
/// Synonym mappings for plan `StepType` enum values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"file-change"`, `"action"`, `"research"`
pub const STEP_TYPE_SYNONYMS: & = &;
/// Synonym mappings for plan `Priority` enum values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"critical"`, `"high"`, `"medium"`, `"low"`
pub const PRIORITY_SYNONYMS: & = &;
/// Synonym mappings for plan `Severity` enum values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"low"`, `"medium"`, `"high"`, `"critical"`
///
/// Note: This uses the same values as `Priority` since the enum values overlap,
/// but is a separate table for clarity and independent extensibility.
pub const SEVERITY_SYNONYMS: & = &;
/// Synonym mappings for plan `ListType` enum values.
///
/// Each tuple is `(synonym, canonical_value)` where `synonym` is a non-canonical
/// value that unambiguously maps to `canonical_value`.
///
/// Canonical values: `"ordered"`, `"unordered"`
pub const LIST_TYPE_SYNONYMS: & = &;
/// Normalize an enum value to its canonical form.
///
/// This function performs tolerant matching for enum-like XML content values.
/// It accepts exact canonical values, case-insensitive variations of canonical
/// values, and configured synonym mappings.
///
/// # Arguments
///
/// * `value` - The raw value to normalize (may have whitespace, different casing)
/// * `valid_values` - The canonical valid values for this enum
/// * `synonyms` - Table of `(synonym, canonical_value)` pairs for tolerant matching
///
/// # Returns
///
/// * `Some(canonical_value)` if the input can be confidently mapped to a canonical value
/// * `None` if the input is ambiguous or unknown (caller should reject)
///
/// # Examples
///
/// Internal usage example:
///
/// ```rust,ignore
/// use ralph_workflow::files::llm_output_extraction::xml_helpers::tolerant_parsing::{
/// normalize_enum_value, DEVELOPMENT_STATUS_SYNONYMS,
/// };
///
/// let valid = &["completed", "partial", "failed"];
///
/// // Exact match
/// assert_eq!(
/// normalize_enum_value("completed", valid, DEVELOPMENT_STATUS_SYNONYMS),
/// Some("completed".to_string())
/// );
///
/// // Case-insensitive
/// assert_eq!(
/// normalize_enum_value("Completed", valid, DEVELOPMENT_STATUS_SYNONYMS),
/// Some("completed".to_string())
/// );
///
/// // Synonym
/// assert_eq!(
/// normalize_enum_value("done", valid, DEVELOPMENT_STATUS_SYNONYMS),
/// Some("completed".to_string())
/// );
///
/// // Unknown value
/// assert_eq!(
/// normalize_enum_value("banana", valid, DEVELOPMENT_STATUS_SYNONYMS),
/// None
/// );
/// ```
/// Compute the Levenshtein distance between two strings.
///
/// This is an inline implementation for short XML tag names (typically < 30 characters).
/// Uses dynamic programming with O(mn) time and O(min(m,n)) space.
/// Normalize a tag name to a known tag using fuzzy matching.
///
/// This function performs tolerant matching for XML element names that may contain
/// minor typos (e.g., `<ralph-sumary>` instead of `<ralph-summary>`).
///
/// # Arguments
///
/// * `tag` - The raw tag name to normalize (may have typos)
/// * `known_tags` - The list of known/valid tag names to match against
///
/// # Returns
///
/// * `Some(&str)` - A reference to the matching known tag if exactly one tag is within
/// the edit-distance threshold (currently 1)
/// * `None` - If zero tags are within threshold OR multiple tags are equally close (ambiguous)
///
/// # Examples
///
/// ```rust,ignore
/// use ralph_workflow::files::llm_output_extraction::xml_helpers::tolerant_parsing::normalize_tag_name;
///
/// let known = &["ralph-status", "ralph-summary", "ralph-files-changed"];
///
/// // Exact match
/// assert_eq!(normalize_tag_name("ralph-summary", known), Some("ralph-summary"));
///
/// // Single char deletion typo
/// assert_eq!(normalize_tag_name("ralph-sumary", known), Some("ralph-summary"));
///
/// // Single char insertion typo
/// assert_eq!(normalize_tag_name("ralph-ssummary", known), Some("ralph-summary"));
///
/// // Completely unknown tag
/// assert_eq!(normalize_tag_name("ralph-banana", known), None);
///
/// // Ambiguous input (equally close to multiple tags)
/// assert_eq!(normalize_tag_name("ralph-status", known), None); // exact match is handled separately
/// ```