anyrepair 0.2.4

A comprehensive Rust crate for repairing malformed structured data including JSON, YAML, XML, TOML, CSV, INI, Markdown, and Diff with format auto-detection
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//! YAML repair functionality

use crate::error::Result;
use crate::traits::{Repair, RepairStrategy, Validator};
use regex::Regex;
use serde_yaml::Value;
use std::sync::OnceLock;

/// Cached regex patterns for YAML performance optimization
#[allow(dead_code)]
struct YamlRegexCache {
    missing_colons: Regex,
    list_items: Regex,
    quoted_strings: Regex,
}

impl YamlRegexCache {
    fn new() -> Result<Self> {
        Ok(Self {
            missing_colons: Regex::new(r#"^(\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s+([^:].*)$"#)?,
            list_items: Regex::new(r#"^\s*-\s*(.+)$"#)?,
            quoted_strings: Regex::new(
                r#"^(\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([^'"].*[^'"])\s*$"#,
            )?,
        })
    }
}

static YAML_REGEX_CACHE: OnceLock<YamlRegexCache> = OnceLock::new();

fn get_yaml_regex_cache() -> &'static YamlRegexCache {
    YAML_REGEX_CACHE
        .get_or_init(|| YamlRegexCache::new().expect("Failed to initialize YAML regex cache"))
}

/// YAML repairer that can fix common YAML issues
///
/// Uses trait-based composition with GenericRepairer for better modularity
pub struct YamlRepairer {
    inner: crate::repairer_base::GenericRepairer,
}

impl YamlRepairer {
    /// Create a new YAML repairer
    pub fn new() -> Self {
        let strategies: Vec<Box<dyn RepairStrategy>> = vec![
            Box::new(FixIndentationStrategy),
            Box::new(AddMissingColonsStrategy),
            Box::new(FixListFormattingStrategy),
            Box::new(AddDocumentSeparatorStrategy),
            Box::new(FixQuotedStringsStrategy),
            Box::new(AdvancedIndentationStrategy),
            Box::new(ComplexStructureStrategy),
        ];

        let validator: Box<dyn Validator> = Box::new(YamlValidator);
        let inner = crate::repairer_base::GenericRepairer::new(validator, strategies);

        Self { inner }
    }
}

impl Default for YamlRepairer {
    fn default() -> Self {
        Self::new()
    }
}

impl Repair for YamlRepairer {
    fn repair(&mut self, content: &str) -> Result<String> {
        self.inner.repair(content)
    }

    fn needs_repair(&self, content: &str) -> bool {
        self.inner.needs_repair(content)
    }

    fn confidence(&self, content: &str) -> f64 {
        if self.inner.validator().is_valid(content) {
            return 1.0;
        }

        // Calculate confidence based on YAML-like patterns
        let mut score: f64 = 0.0;

        // Check for key-value pairs with colons
        if content.contains(':') {
            score += 0.3;
        }

        // Check for proper indentation patterns
        let lines: Vec<&str> = content.lines().collect();
        let mut has_consistent_indentation = true;
        let mut last_indent = 0;
        let mut has_content = false;

        for line in &lines {
            if line.trim().is_empty() || line.starts_with('#') {
                continue;
            }
            has_content = true;

            let indent = line.chars().take_while(|c| c.is_whitespace()).count();
            if last_indent > 0 && indent != last_indent && indent != last_indent + 2 {
                has_consistent_indentation = false;
                break;
            }
            last_indent = indent;
        }

        if has_consistent_indentation && has_content {
            score += 0.3;
        }

        // Check for list indicators
        if content.contains('-') {
            score += 0.2;
        }

        // Check for document separator
        if content.contains("---") {
            score += 0.1;
        }

        // Check for quoted strings
        if content.contains('"') || content.contains("'") {
            score += 0.1;
        }

        score.min(1.0_f64)
    }
}

/// YAML validator
pub struct YamlValidator;

impl Validator for YamlValidator {
    fn is_valid(&self, content: &str) -> bool {
        if content.trim().is_empty() {
            return false;
        }

        // Check for basic YAML syntax issues
        let lines: Vec<&str> = content.lines().collect();
        for line in lines {
            let trimmed = line.trim();
            if trimmed.is_empty() || trimmed.starts_with('#') {
                continue;
            }

            // Check for missing colons in key-value pairs
            if !trimmed.starts_with('-')
                && !trimmed.starts_with('[')
                && !trimmed.starts_with('{')
                && !trimmed.contains(':')
                && trimmed.contains(' ')
            {
                return false;
            }
        }

        serde_yaml::from_str::<Value>(content).is_ok()
    }

    fn validate(&self, content: &str) -> Vec<String> {
        match serde_yaml::from_str::<Value>(content) {
            Ok(_) => vec![],
            Err(e) => vec![e.to_string()],
        }
    }
}

/// Strategy to fix indentation issues
struct FixIndentationStrategy;

impl RepairStrategy for FixIndentationStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let lines: Vec<&str> = content.lines().collect();
        let mut result = Vec::<String>::new();
        let mut indent_stack = vec![0];

        for line in lines {
            if line.trim().is_empty() {
                result.push(line.to_string());
                continue;
            }

            let _current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
            let trimmed = line.trim();

            // Determine expected indentation based on context
            let base_indent = indent_stack.last().copied().unwrap_or(0);
            let expected_indent = if trimmed.starts_with('-') || trimmed.ends_with(':') {
                base_indent
            } else {
                base_indent + 2
            };

            // Fix missing colons for key-value pairs
            let fixed_trimmed = if !trimmed.contains(':') && trimmed.contains(' ') {
                // This looks like a key-value pair missing a colon
                trimmed.replacen(' ', ": ", 1)
            } else {
                trimmed.to_string()
            };

            // Fix indentation
            let fixed_line = format!("{}{}", " ".repeat(expected_indent), fixed_trimmed);
            result.push(fixed_line);

            // Update indent stack
            if fixed_trimmed.ends_with(':') || fixed_trimmed.starts_with('-') {
                indent_stack.push(expected_indent + 2);
            }
        }

        Ok(result.join("\n"))
    }

    fn priority(&self) -> u8 {
        5
    }

    fn name(&self) -> &str {
        "FixIndentationStrategy"
    }
}

/// Strategy to add missing colons
struct AddMissingColonsStrategy;

impl RepairStrategy for AddMissingColonsStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let cache = get_yaml_regex_cache();
        let lines: Vec<&str> = content.lines().collect();
        let mut result = Vec::new();

        for line in lines {
            if cache.missing_colons.is_match(line) {
                let fixed = cache.missing_colons.replace(line, "$1$2: $3");
                result.push(fixed.to_string());
            } else {
                result.push(line.to_string());
            }
        }

        Ok(result.join("\n"))
    }

    fn priority(&self) -> u8 {
        4
    }

    fn name(&self) -> &str {
        "AddMissingColonsStrategy"
    }
}

/// Strategy to fix list formatting
struct FixListFormattingStrategy;

impl RepairStrategy for FixListFormattingStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let cache = get_yaml_regex_cache();
        let lines: Vec<&str> = content.lines().collect();
        let mut result = Vec::new();

        for line in lines {
            if cache.list_items.is_match(line) {
                let fixed = cache.list_items.replace(line, "- $1");
                result.push(fixed.to_string());
            } else {
                result.push(line.to_string());
            }
        }

        Ok(result.join("\n"))
    }

    fn priority(&self) -> u8 {
        3
    }

    fn name(&self) -> &str {
        "FixListFormattingStrategy"
    }
}

/// Strategy to add document separator
struct AddDocumentSeparatorStrategy;

impl RepairStrategy for AddDocumentSeparatorStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let trimmed = content.trim();
        if !trimmed.starts_with("---") {
            Ok(format!("---\n{}", trimmed))
        } else {
            Ok(trimmed.to_string())
        }
    }

    fn priority(&self) -> u8 {
        2
    }

    fn name(&self) -> &str {
        "AddDocumentSeparatorStrategy"
    }
}

/// Strategy to fix quoted strings
struct FixQuotedStringsStrategy;

impl RepairStrategy for FixQuotedStringsStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        // Convert single quotes to double quotes
        let single_quote_re = Regex::new(r"'([^']*)'")?;
        let result = single_quote_re.replace_all(content, r#""$1""#);
        Ok(result.to_string())
    }

    fn priority(&self) -> u8 {
        1
    }

    fn name(&self) -> &str {
        "FixQuotedStringsStrategy"
    }
}

/// Strategy for advanced indentation detection and fixing
struct AdvancedIndentationStrategy;

impl RepairStrategy for AdvancedIndentationStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let lines: Vec<&str> = content.lines().collect();
        let mut result = Vec::new();
        let _indent_stack: Vec<usize> = Vec::new();
        let mut current_indent = 0;

        for line in lines {
            if line.trim().is_empty() || line.starts_with('#') {
                result.push(line.to_string());
                continue;
            }

            let line_indent = line.chars().take_while(|c| c.is_whitespace()).count();
            let trimmed = line.trim();

            // Detect list items
            if let Some(stripped) = trimmed.strip_prefix('-') {
                // List items should be indented 2 spaces more than their parent
                let expected_indent = current_indent + 2;
                if line_indent != expected_indent {
                    let fixed = format!("{}- {}", " ".repeat(expected_indent), stripped.trim());
                    result.push(fixed);
                    current_indent = expected_indent;
                } else {
                    result.push(line.to_string());
                    current_indent = line_indent;
                }
            } else if trimmed.contains(':') {
                // Key-value pairs
                let expected_indent = current_indent;
                if line_indent != expected_indent {
                    let fixed = format!("{}{}", " ".repeat(expected_indent), trimmed);
                    result.push(fixed);
                    current_indent = expected_indent;
                } else {
                    result.push(line.to_string());
                    current_indent = line_indent;
                }
            } else {
                // Other content - maintain relative indentation
                result.push(line.to_string());
                current_indent = line_indent;
            }
        }

        Ok(result.join("\n"))
    }

    fn priority(&self) -> u8 {
        6
    }

    fn name(&self) -> &str {
        "AdvancedIndentationStrategy"
    }
}

/// Strategy for handling complex nested structures
struct ComplexStructureStrategy;

impl RepairStrategy for ComplexStructureStrategy {
    fn apply(&self, content: &str) -> Result<String> {
        let lines: Vec<&str> = content.lines().collect();
        let mut result = Vec::new();
        let mut in_multiline_string = false;
        let mut multiline_indent = 0;

        for line in lines.iter() {
            if line.trim().is_empty() || line.starts_with('#') {
                result.push(line.to_string());
                continue;
            }

            let trimmed = line.trim();

            // Handle multiline strings
            if trimmed.starts_with('|') || trimmed.starts_with('>') {
                in_multiline_string = true;
                multiline_indent = line.chars().take_while(|c| c.is_whitespace()).count();
                result.push(line.to_string());
                continue;
            }

            if in_multiline_string {
                let line_indent = line.chars().take_while(|c| c.is_whitespace()).count();
                if line_indent > multiline_indent || line.trim().is_empty() {
                    result.push(line.to_string());
                    continue;
                } else {
                    in_multiline_string = false;
                }
            }

            // Fix nested object/array structures
            if trimmed.starts_with('-') && trimmed.contains(':') {
                // List item with key-value pair
                let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
                if parts.len() == 2 {
                    let key = parts[0].trim();
                    let value = parts[1].trim();
                    let fixed = format!("- {}: {}", key, value);
                    result.push(fixed);
                } else {
                    result.push(line.to_string());
                }
            } else if trimmed.contains(':') && !trimmed.ends_with(':') {
                // Key-value pair
                let parts: Vec<&str> = trimmed.splitn(2, ':').collect();
                if parts.len() == 2 {
                    let key = parts[0].trim();
                    let value = parts[1].trim();
                    if value.is_empty() {
                        // Key with no value - might be a parent object
                        result.push(line.to_string());
                    } else {
                        let fixed = format!("{}: {}", key, value);
                        result.push(fixed);
                    }
                } else {
                    result.push(line.to_string());
                }
            } else {
                result.push(line.to_string());
            }
        }

        Ok(result.join("\n"))
    }

    fn priority(&self) -> u8 {
        5
    }

    fn name(&self) -> &str {
        "ComplexStructureStrategy"
    }
}