Skip to main content

formatparse_core/parser/
pattern.rs

1//! Parse format patterns into regex strings and field specifications (pure Rust).
2
3use crate::error::FormatParseError;
4use crate::types::definitions::{FieldSpec, FieldType};
5use regex;
6use std::collections::HashMap;
7
8/// Maximum recursive depth when compiling nested format patterns (GitHub issue #12).
9pub const MAX_NESTED_FORMAT_DEPTH: usize = 10;
10
11/// Maximum brace nesting **within** one field's format specification (safety cap).
12const MAX_BRACE_DEPTH_IN_FORMAT_SPEC: i32 = 10;
13
14/// Result tuple from [`parse_pattern`]: compiled pattern string, search regex string, field
15/// specs, original and normalized field names, normalized-to-original name map, and whether
16/// `""` may match when every field is a default unconstrained string.
17pub type ParsedPatternParts = (
18    String,
19    String,
20    Vec<FieldSpec>,
21    Vec<Option<String>>,
22    Vec<Option<String>>,
23    HashMap<String, String>,
24    bool,
25);
26
27/// True when `s` contains at least one non-whitespace character (trim is non-empty).
28fn literal_delimits_empty_field(s: &str) -> bool {
29    !s.trim().is_empty()
30}
31
32/// Collect the format-spec substring after `:` until the matching `}` that closes this
33/// field, honoring nested `{`…`}` and doubled `{{` / `}}` escapes (formatparse#12).
34fn collect_balanced_format_spec(
35    chars: &mut std::iter::Peekable<std::str::Chars>,
36) -> Result<String, FormatParseError> {
37    let mut out = String::new();
38    let mut depth = 0i32;
39    loop {
40        let Some(&ch) = chars.peek() else {
41            return Err(FormatParseError::PatternError(
42                "Unclosed '{' in pattern: expected '}' to close the field".to_string(),
43            ));
44        };
45        if ch == '}' && depth == 0 {
46            break;
47        }
48        let c = chars
49            .next()
50            .expect("peek matched a char so next() must succeed");
51        match c {
52            '{' => {
53                if chars.peek() == Some(&'{') {
54                    chars.next();
55                    out.push('{');
56                    out.push('{');
57                } else {
58                    depth += 1;
59                    if depth > MAX_BRACE_DEPTH_IN_FORMAT_SPEC {
60                        return Err(FormatParseError::PatternError(
61                            "Format specification has too many nested '{' (max 10)".to_string(),
62                        ));
63                    }
64                    out.push('{');
65                }
66            }
67            // A lone `}` closes one `{…}` nesting level inside the spec. Do **not** merge two
68            // consecutive `}` into the `}}` escape here: in `{outer:{inner:d}}` the first `}`
69            // closes the inner field and the second closes the outer field (formatparse#12).
70            '}' => {
71                depth -= 1;
72                if depth < 0 {
73                    return Err(FormatParseError::PatternError(
74                        "Unexpected '}' in format specification".to_string(),
75                    ));
76                }
77                out.push('}');
78            }
79            _ => out.push(c),
80        }
81    }
82    Ok(out)
83}
84
85fn brace_balance_valid_for_nested_candidate(s: &str) -> bool {
86    let mut depth = 0i32;
87    let mut it = s.chars().peekable();
88    while let Some(c) = it.next() {
89        match c {
90            '{' => {
91                if it.peek() == Some(&'{') {
92                    it.next();
93                    continue;
94                }
95                depth += 1;
96            }
97            '}' => {
98                depth -= 1;
99                if depth < 0 {
100                    return false;
101                }
102            }
103            _ => {}
104        }
105    }
106    depth == 0
107}
108
109/// True when `trimmed` should be compiled as a nested brace pattern (not a classic
110/// ``[[fill]align]…[type]]`` format spec).
111fn is_nested_format_spec_candidate(trimmed: &str) -> bool {
112    if trimmed.len() < 2 {
113        return false;
114    }
115    if !trimmed.starts_with('{') || trimmed.starts_with("{{") {
116        return false;
117    }
118    if !trimmed.ends_with('}') {
119        return false;
120    }
121    brace_balance_valid_for_nested_candidate(trimmed)
122}
123
124/// Strip a leading ``^`` and trailing ``$`` from an anchored full-pattern regex string.
125fn strip_regex_anchors(anchored: &str) -> String {
126    let s = anchored.strip_prefix('^').unwrap_or(anchored);
127    let s = s.strip_suffix('$').unwrap_or(s);
128    s.to_string()
129}
130
131/// After [`parse_field`], `chars` is at optional whitespace then the closing `}`.
132/// True when there is a non-whitespace literal run after that `}` and before the next unescaped `{`
133/// or end of pattern (formatparse#83). Whitespace-only gaps do not count so ``{} {}`` keeps
134/// non-empty captures for both fields.
135fn has_trailing_literal_before_next_field(mut chars: std::iter::Peekable<std::str::Chars>) -> bool {
136    while chars.peek().is_some_and(|c| c.is_whitespace()) {
137        chars.next();
138    }
139    if chars.next() != Some('}') {
140        return false;
141    }
142    while chars.peek().is_some_and(|c| c.is_whitespace()) {
143        chars.next();
144    }
145    let mut literal = String::new();
146    loop {
147        match chars.next() {
148            None => return literal_delimits_empty_field(&literal),
149            Some('{') => {
150                if chars.peek() == Some(&'{') {
151                    chars.next();
152                    literal.push('{');
153                } else {
154                    return literal_delimits_empty_field(&literal);
155                }
156            }
157            Some('}') => {
158                if chars.peek() == Some(&'}') {
159                    chars.next();
160                    literal.push('}');
161                } else {
162                    literal.push('}');
163                }
164            }
165            Some(c) => literal.push(c),
166        }
167    }
168}
169
170/// Parse a format pattern string into regex parts, field specs, and names
171/// `allow_empty_delimited_default_string`: when false, default string fields always use `.+?`
172/// (used for the unanchored search regex so search/findall do not stop early).
173pub fn parse_pattern(
174    pattern: &str,
175    custom_patterns: &HashMap<String, String>,
176    allow_empty_delimited_default_string: bool,
177    nesting_depth: usize,
178) -> Result<ParsedPatternParts, FormatParseError> {
179    // Pre-allocate with estimated capacity based on pattern length
180    let estimated_fields = pattern.matches('{').count();
181    let mut regex_parts = Vec::with_capacity(estimated_fields * 2);
182    let mut field_specs = Vec::with_capacity(estimated_fields);
183    let mut field_names = Vec::with_capacity(estimated_fields); // Original names
184    let mut normalized_names = Vec::with_capacity(estimated_fields); // Normalized for regex
185    let mut name_mapping = HashMap::with_capacity(estimated_fields); // normalized -> original
186    let mut field_name_types = HashMap::with_capacity(estimated_fields); // Track field name -> FieldType for validation
187    let mut chars: std::iter::Peekable<std::str::Chars> = pattern.chars().peekable();
188    let mut literal = String::new();
189    let mut allows_empty_default_string_match = true;
190
191    while let Some(ch) = chars.next() {
192        match ch {
193            '{' => {
194                // Check for escaped brace
195                if chars.peek() == Some(&'{') {
196                    chars.next();
197                    literal.push('{');
198                    continue;
199                }
200
201                let had_leading_literal = !literal.trim().is_empty();
202
203                // Flush literal part
204                if !literal.is_empty() {
205                    allows_empty_default_string_match = false;
206                    // If literal ends with whitespace, make it flexible to allow multiple spaces
207                    // But use \s+ (one or more) instead of \s* (zero or more) to ensure we consume the space
208                    let escaped = if literal.trim_end() != literal {
209                        // Literal ends with whitespace - replace trailing whitespace with \s+
210                        // to allow one or more spaces (ensures we consume at least one space)
211                        let trimmed = literal.trim_end();
212                        let mut escaped_str = String::with_capacity(trimmed.len() + 4);
213                        escaped_str.push_str(&regex::escape(trimmed));
214                        escaped_str.push_str("\\s+");
215                        escaped_str
216                    } else {
217                        regex::escape(&literal)
218                    };
219                    regex_parts.push(escaped);
220                    literal.clear();
221                }
222
223                // Parse field specification
224                let (mut spec, name) = parse_field(&mut chars, nesting_depth)?;
225
226                if matches!(spec.field_type, FieldType::Nested) {
227                    if nesting_depth >= MAX_NESTED_FORMAT_DEPTH {
228                        return Err(FormatParseError::PatternError(
229                            "Nested format patterns exceed max depth (10)".to_string(),
230                        ));
231                    }
232                    let inner = spec.nested_subpattern.as_ref().ok_or_else(|| {
233                        FormatParseError::PatternError(
234                            "Internal error: nested field missing subpattern".to_string(),
235                        )
236                    })?;
237                    let (inner_anchored, _, _, _, _, _, _) = parse_pattern(
238                        inner,
239                        custom_patterns,
240                        allow_empty_delimited_default_string,
241                        nesting_depth + 1,
242                    )?;
243                    spec.nested_regex_body = Some(strip_regex_anchors(&inner_anchored));
244                }
245
246                if !spec.is_default_unconstrained_string() {
247                    allows_empty_default_string_match = false;
248                }
249
250                let has_trailing_literal = has_trailing_literal_before_next_field(chars.clone());
251
252                // Check if the next field (if any) is empty {} (non-greedy)
253                // This affects width-only string patterns: exact when followed by {}, greedy otherwise
254                let mut peek_chars = chars.clone();
255                let next_field_is_greedy = loop {
256                    // Skip whitespace and consume the expected closing '}'
257                    let mut found_closing = false;
258                    while let Some(&ch) = peek_chars.peek() {
259                        if ch.is_whitespace() {
260                            peek_chars.next();
261                        } else if ch == '}' {
262                            peek_chars.next(); // Consume the closing brace
263                            found_closing = true;
264                            break;
265                        } else {
266                            break;
267                        }
268                    }
269                    if !found_closing {
270                        break None; // No more fields
271                    }
272                    // Skip any whitespace after the closing brace
273                    while let Some(&ch) = peek_chars.peek() {
274                        if ch.is_whitespace() {
275                            peek_chars.next();
276                        } else {
277                            break;
278                        }
279                    }
280                    // Check for opening brace (indicating another field)
281                    if peek_chars.peek() == Some(&'{') {
282                        peek_chars.next();
283                        // Check if it's escaped
284                        if peek_chars.peek() == Some(&'{') {
285                            peek_chars.next();
286                            continue; // Escaped brace, continue
287                        }
288                        // Found a field - check if it's empty {} or has precision
289                        if peek_chars.peek() == Some(&'}') {
290                            // Empty field {} - non-greedy, use exact width
291                            break Some(false);
292                        } else {
293                            // Check if the field has precision (like {:.4})
294                            let mut field_chars = peek_chars.clone();
295                            let mut has_precision = false;
296                            while let Some(&ch) = field_chars.peek() {
297                                if ch == '}' {
298                                    break;
299                                }
300                                if ch == ':' {
301                                    field_chars.next();
302                                    // Check for precision after colon
303                                    while let Some(&next_ch) = field_chars.peek() {
304                                        if next_ch == '}' {
305                                            break;
306                                        }
307                                        if next_ch == '.' {
308                                            has_precision = true;
309                                            break;
310                                        }
311                                        field_chars.next();
312                                    }
313                                    break;
314                                }
315                                field_chars.next();
316                            }
317                            // If next field has precision, it's greedy (so current should be greedy too)
318                            // If next field is empty {}, it's non-greedy (so current should be exact)
319                            break Some(has_precision);
320                        }
321                    } else {
322                        // No more fields - use greedy
323                        break None;
324                    }
325                };
326
327                let allow_empty_delimited = allow_empty_delimited_default_string
328                    && spec.is_default_unconstrained_string()
329                    && (had_leading_literal || has_trailing_literal);
330                let pattern = spec.to_regex_pattern(
331                    custom_patterns,
332                    next_field_is_greedy,
333                    allow_empty_delimited,
334                );
335                let la_raw = spec.regex_lookahead.as_deref().unwrap_or("");
336                let (lb_prefix, body, la_emit) =
337                    crate::rewrite_field_fragments_for_engine_anchor(&pattern, la_raw);
338
339                // Validate repeated field names have same type
340                if let Some(ref original_name) = name {
341                    if let Some(existing_type) = field_name_types.get(original_name) {
342                        // Check if types match
343                        if !field_types_match(existing_type, &spec.field_type) {
344                            return Err(FormatParseError::RepeatedNameError(original_name.clone()));
345                        }
346                    } else {
347                        field_name_types.insert(original_name.clone(), spec.field_type.clone());
348                    }
349                }
350
351                // Issue #15 / parse#146: `{name:brace}` — capture text inside `{`…`}` in the input
352                // (non-greedy `.*?`; later pattern literals may force a later `}`). Supports empty `{}`.
353                // Requires a non-numbered name.
354                let group_pattern = if matches!(spec.field_type, FieldType::BracedContent) {
355                    let Some(ref original_name) = name else {
356                        return Err(FormatParseError::PatternError(
357                            "The :brace format requires a named field (e.g. {content:brace})"
358                                .to_string(),
359                        ));
360                    };
361                    if original_name.chars().all(|c| c.is_ascii_digit()) {
362                        return Err(FormatParseError::PatternError(
363                            "The :brace format cannot be used with numbered fields".to_string(),
364                        ));
365                    }
366                    let normalized =
367                        normalize_field_name(original_name, &mut name_mapping, &normalized_names);
368                    format!("\\{{(?P<{}>.*?)\\}}", normalized)
369                } else if let Some(ref original_name) = name {
370                    // Check if field name is numeric (numbered field like {0}, {1}) - these should be positional
371                    let is_numeric = original_name.chars().all(|c| c.is_ascii_digit());
372
373                    if is_numeric {
374                        // Numbered fields are positional (unnamed groups), not named groups
375                        format!("{}{}({}){}", lb_prefix, "", body, la_emit)
376                    } else {
377                        // Normalize name: replace hyphens/dots with underscores, handle collisions
378                        let normalized = normalize_field_name(
379                            original_name,
380                            &mut name_mapping,
381                            &normalized_names,
382                        );
383                        format!("{}{}(?P<{}>{}){}", lb_prefix, "", normalized, body, la_emit)
384                    }
385                } else {
386                    format!("{}{}({}){}", lb_prefix, "", body, la_emit)
387                };
388
389                regex_parts.push(group_pattern);
390
391                // Handle name normalization for regex groups
392                if let Some(ref original_name) = name {
393                    // Check if field name is numeric (numbered field like {0}, {1}) - these should be positional
394                    let is_numeric = original_name.chars().all(|c| c.is_ascii_digit());
395
396                    if is_numeric {
397                        field_names.push(None); // Store as None (positional)
398                        normalized_names.push(None);
399                    } else {
400                        let normalized = normalize_field_name(
401                            original_name,
402                            &mut name_mapping,
403                            &normalized_names,
404                        );
405                        field_names.push(Some(original_name.clone())); // Store original
406                        normalized_names.push(Some(normalized.clone())); // Store normalized
407                        name_mapping.insert(normalized, original_name.clone()); // Map normalized -> original
408                    }
409                } else {
410                    field_names.push(None);
411                    normalized_names.push(None);
412                }
413                field_specs.push(spec);
414
415                // Expect closing brace
416                if chars.next() != Some('}') {
417                    return Err(FormatParseError::PatternError(
418                        "Expected '}' after field specification".to_string(),
419                    ));
420                }
421            }
422            '}' => {
423                // Check for escaped brace
424                if chars.peek() == Some(&'}') {
425                    chars.next();
426                    literal.push('}');
427                    continue;
428                }
429                literal.push('}');
430            }
431            _ => {
432                literal.push(ch);
433            }
434        }
435    }
436
437    // Flush remaining literal
438    if !literal.is_empty() {
439        allows_empty_default_string_match = false;
440        // If literal ends with whitespace, make it flexible to allow multiple spaces
441        let escaped = if literal.trim_end() != literal {
442            // Literal ends with whitespace - replace trailing whitespace with \s*
443            // to allow zero or more spaces (maintains compatibility with exact matches)
444            let trimmed = literal.trim_end();
445            format!("{}\\s*", regex::escape(trimmed))
446        } else {
447            regex::escape(&literal)
448        };
449        regex_parts.push(escaped);
450    }
451
452    let regex_str = regex_parts.join("");
453    let regex_str_with_anchors = format!("^{}$", regex_str);
454    Ok((
455        regex_str_with_anchors,
456        regex_str,
457        field_specs,
458        field_names,
459        normalized_names,
460        name_mapping,
461        allows_empty_default_string_match,
462    ))
463}
464
465/// Normalize field name for use inside `(?P<name>...)` capture groups.
466///
467/// Hyphens and dots become underscores (legacy parse compatibility). Dict-style paths use
468/// `[` / `]` (`person[name]`); only `[` maps to `_`, and closing `]` is omitted so we do not
469/// add a trailing separator (e.g. `hello[world]` → `hello_world`). `[` / `]` are not valid
470/// in Rust/fancy-regex capture group identifiers.
471pub fn normalize_field_name(
472    name: &str,
473    _name_mapping: &mut HashMap<String, String>,
474    existing_normalized: &[Option<String>],
475) -> String {
476    let mut base_normalized = String::with_capacity(name.len());
477    for c in name.chars() {
478        match c {
479            '-' | '.' | '[' => base_normalized.push('_'),
480            ']' => {}
481            _ => base_normalized.push(c),
482        }
483    }
484
485    // Check for collisions with existing normalized names
486    let mut normalized = base_normalized.clone();
487
488    // Find the position of the first underscore to insert additional underscores there
489    let underscore_pos = normalized.find('_');
490
491    // Check if this exact normalized name already exists
492    let mut collision_count = 0;
493    while existing_normalized
494        .iter()
495        .any(|n| n.as_ref().map(|s| s == &normalized).unwrap_or(false))
496    {
497        collision_count += 1;
498        // Insert additional underscores at the first underscore position
499        // For "a_b", collisions become "a__b", "a___b", etc.
500        if let Some(pos) = underscore_pos {
501            let before = &base_normalized[..pos];
502            let after = &base_normalized[pos + 1..];
503            // Total underscores = 1 (base) + collision_count
504            normalized = format!("{}{}{}", before, "_".repeat(1 + collision_count), after);
505        } else {
506            // No underscore found, append underscores (shouldn't happen in practice)
507            normalized = format!("{}{}", base_normalized, "_".repeat(collision_count));
508        }
509    }
510
511    normalized
512}
513
514/// Reject `:ml` / `:blk` combined with numeric-only format specifiers (GitHub issues #8, #69, #70).
515///
516/// Width, precision, alignment, and fill are supported for multiline and indent-block fields;
517/// ``sign``, ``zero_pad``, and ``=`` alignment remain unsupported.
518pub fn validate_multiline_mvp(spec: &FieldSpec) -> Result<(), FormatParseError> {
519    if !matches!(
520        spec.field_type,
521        FieldType::Multiline | FieldType::IndentBlock
522    ) {
523        return Ok(());
524    }
525    if spec.sign.is_some() || spec.zero_pad {
526        return Err(FormatParseError::PatternError(
527            "Multiline types :ml and :blk do not support sign or zero-padding".to_string(),
528        ));
529    }
530    if spec.alignment == Some('=') {
531        return Err(FormatParseError::PatternError(
532            "Multiline types :ml and :blk do not support '=' alignment".to_string(),
533        ));
534    }
535    Ok(())
536}
537
538/// Check if two field types match (for repeated name validation)
539pub fn field_types_match(t1: &FieldType, t2: &FieldType) -> bool {
540    use std::mem::discriminant;
541    discriminant(t1) == discriminant(t2)
542}
543
544/// Parse a field name into a path (for dict-style names like "hello[world]" -> ["hello", "world"])
545pub fn parse_field_path(field_name: &str) -> Vec<String> {
546    let mut path = Vec::new();
547    let mut current = String::new();
548    let mut in_brackets = false;
549
550    for ch in field_name.chars() {
551        match ch {
552            '[' => {
553                if !current.is_empty() {
554                    path.push(current.clone());
555                    current.clear();
556                }
557                in_brackets = true;
558            }
559            ']' => {
560                if in_brackets {
561                    if !current.is_empty() {
562                        path.push(current.clone());
563                        current.clear();
564                    }
565                    in_brackets = false;
566                } else {
567                    current.push(ch);
568                }
569            }
570            _ => {
571                current.push(ch);
572            }
573        }
574    }
575
576    if !current.is_empty() {
577        path.push(current);
578    }
579
580    path
581}
582
583/// Parse a single field specification from the pattern
584pub fn parse_field(
585    chars: &mut std::iter::Peekable<std::str::Chars>,
586    nesting_depth: usize,
587) -> Result<(FieldSpec, Option<String>), FormatParseError> {
588    let mut spec = FieldSpec::new();
589    let mut field_name = String::new();
590    let mut in_name = true;
591
592    // Parse field name (before colon or conversion)
593    let mut in_brackets = false;
594    while let Some(&ch) = chars.peek() {
595        match ch {
596            ':' => {
597                chars.next();
598                in_name = false;
599                break;
600            }
601            '!' => {
602                chars.next();
603                // Conversion specifier (s, r, a) - skip for now
604                if chars.peek().is_some() {
605                    chars.next();
606                }
607                in_name = false;
608            }
609            '}' => {
610                break;
611            }
612            '[' => {
613                in_brackets = true;
614                field_name.push(ch);
615                chars.next();
616            }
617            ']' => {
618                in_brackets = false;
619                field_name.push(ch);
620                chars.next();
621            }
622            '\'' | '"' => {
623                // Quote characters in field names indicate quoted keys (not supported)
624                if in_brackets {
625                    return Err(FormatParseError::NotImplementedError(
626                        "Quoted keys in field names".to_string(),
627                    ));
628                }
629                // Not in brackets, not a valid name character
630                in_name = false;
631                break;
632            }
633            _ => {
634                // Allow alphanumeric, underscore, hyphen, dot for field names
635                if ch.is_alphanumeric() || ch == '_' || ch == '-' || ch == '.' {
636                    field_name.push(ch);
637                    chars.next();
638                } else {
639                    // Not a valid name character, might be format spec
640                    in_name = false;
641                    break;
642                }
643            }
644        }
645    }
646
647    // Parse format spec (after colon until closing `}` that ends this field)
648    if !in_name {
649        let format_spec = collect_balanced_format_spec(chars)?;
650        let trimmed = format_spec.trim();
651        if is_nested_format_spec_candidate(trimmed) {
652            if nesting_depth >= MAX_NESTED_FORMAT_DEPTH {
653                return Err(FormatParseError::PatternError(
654                    "Nested format patterns exceed max depth (10)".to_string(),
655                ));
656            }
657            spec.field_type = FieldType::Nested;
658            spec.nested_subpattern = Some(trimmed.to_string());
659        } else {
660            parse_format_spec(&format_spec, &mut spec)?;
661        }
662        validate_multiline_mvp(&spec)?;
663    }
664
665    let name = if field_name.is_empty() {
666        None
667    } else {
668        Some(field_name)
669    };
670
671    Ok((spec, name))
672}
673
674/// Parse format specifier string into FieldSpec
675pub fn parse_format_spec(format_spec: &str, spec: &mut FieldSpec) -> Result<(), FormatParseError> {
676    // Format spec: [[fill]align][sign][#][0][width][,][.precision][type]
677    // Examples: "<10", ">", "^5.2f", "+d", "03d", ".2f"
678
679    let mut chars = format_spec.chars().peekable();
680
681    // Parse fill and align (optional)
682    // align can be: '<', '>', '^', '='
683    if let Some(&ch) = chars.peek() {
684        if ch == '<' || ch == '>' || ch == '^' || ch == '=' {
685            spec.alignment = Some(ch);
686            chars.next();
687        } else {
688            // Check if we have fill + align (e.g., "x<")
689            let mut peek_iter = chars.clone();
690            peek_iter.next(); // skip first char
691            if let Some(next_ch) = peek_iter.next() {
692                if next_ch == '<' || next_ch == '>' || next_ch == '^' || next_ch == '=' {
693                    spec.fill = Some(ch);
694                    chars.next(); // consume fill
695                    spec.alignment = Some(next_ch);
696                    chars.next(); // consume align
697                }
698            }
699        }
700    }
701
702    // Parse sign (optional): '+', '-', ' '
703    if let Some(&ch) = chars.peek() {
704        if ch == '+' || ch == '-' || ch == ' ' {
705            spec.sign = Some(ch);
706            chars.next();
707        }
708    }
709
710    // Parse # (alternate form) - skip for now
711    if chars.peek() == Some(&'#') {
712        chars.next();
713    }
714
715    // Parse 0 (zero padding)
716    if chars.peek() == Some(&'0') {
717        spec.zero_pad = true;
718        chars.next();
719    }
720
721    // Parse width (digits)
722    let mut width_str = String::new();
723    while let Some(&ch) = chars.peek() {
724        if ch.is_ascii_digit() {
725            width_str.push(ch);
726            chars.next();
727        } else {
728            break;
729        }
730    }
731    if !width_str.is_empty() {
732        spec.width = width_str.parse::<usize>().ok();
733    }
734
735    // Parse comma (thousands separator) - skip for now
736    if chars.peek() == Some(&',') {
737        chars.next();
738    }
739
740    // Parse precision (.digits)
741    if chars.peek() == Some(&'.') {
742        chars.next();
743        let mut precision_str = String::new();
744        while let Some(&ch) = chars.peek() {
745            if ch.is_ascii_digit() {
746                precision_str.push(ch);
747                chars.next();
748            } else {
749                break;
750            }
751        }
752        if !precision_str.is_empty() {
753            spec.precision = precision_str.parse::<usize>().ok();
754        }
755    }
756
757    // Remaining characters: type token(s), optional trailing lookarounds (issue #9)
758    let mut type_str = String::new();
759    for ch in chars {
760        type_str.push(ch);
761    }
762
763    if type_str == "%" {
764        spec.field_type = FieldType::Percentage;
765        return Ok(());
766    }
767    if type_str.starts_with('%') {
768        crate::reject_lookaround_in_strftime(&type_str).map_err(FormatParseError::PatternError)?;
769        spec.field_type = FieldType::DateTimeStrftime;
770        spec.strftime_format = Some(type_str.clone());
771        return Ok(());
772    }
773
774    let (type_base, lookaround_tail) = crate::split_type_base_and_lookaround_tail(&type_str);
775    if type_base.is_empty() && !lookaround_tail.is_empty() {
776        return Err(FormatParseError::PatternError(
777            "Type specification must precede lookaround assertions".to_string(),
778        ));
779    }
780
781    // Extract type name (alphabetic characters only) from the type base (not from lookarounds)
782    let type_name: String = type_base.chars().filter(|c| c.is_alphabetic()).collect();
783
784    spec.field_type = if type_name.is_empty() {
785        FieldType::String
786    } else if type_name == "ti" {
787        FieldType::DateTimeISO
788    } else if type_name == "te" {
789        FieldType::DateTimeRFC2822
790    } else if type_name == "tg" {
791        FieldType::DateTimeGlobal
792    } else if type_name == "ta" {
793        FieldType::DateTimeUS
794    } else if type_name == "tc" {
795        FieldType::DateTimeCtime
796    } else if type_name == "th" {
797        FieldType::DateTimeHTTP
798    } else if type_name == "tt" {
799        FieldType::DateTimeTime
800    } else if type_name == "ts" {
801        FieldType::DateTimeSystem
802    } else if type_name == "brace" {
803        FieldType::BracedContent
804    } else if type_name == "ml" {
805        FieldType::Multiline
806    } else if type_name == "blk" {
807        FieldType::IndentBlock
808    } else if type_name.len() > 1 {
809        FieldType::Custom(type_name)
810    } else {
811        let type_char = type_name.chars().next().unwrap();
812        spec.original_type_char = Some(type_char);
813        match type_char {
814            's' => FieldType::String,
815            'd' | 'i' => FieldType::Integer,
816            'b' | 'o' | 'x' | 'X' => FieldType::Integer,
817            'n' => FieldType::NumberWithThousands,
818            'f' | 'F' => FieldType::Float,
819            'e' | 'E' => FieldType::Scientific,
820            'g' | 'G' => FieldType::GeneralNumber,
821            'l' => FieldType::Letters,
822            'w' => FieldType::Word,
823            'W' => FieldType::NonLetters,
824            'S' => FieldType::NonWhitespace,
825            'D' => FieldType::NonDigits,
826            c => FieldType::Custom(c.to_string()),
827        }
828    };
829
830    if !lookaround_tail.is_empty() {
831        let (lb, la) = crate::parse_lookaround_tail(lookaround_tail)
832            .map_err(FormatParseError::PatternError)?;
833        match &spec.field_type {
834            FieldType::Integer | FieldType::Float => {
835                spec.regex_lookbehind = if lb.is_empty() { None } else { Some(lb) };
836                spec.regex_lookahead = if la.is_empty() { None } else { Some(la) };
837            }
838            _ => {
839                return Err(FormatParseError::PatternError("Lookaround assertions are only supported for integer and float format types (d, i, b, o, x, X, f, F)".to_string()));
840            }
841        }
842    }
843
844    Ok(())
845}
846
847#[cfg(test)]
848mod normalize_field_name_tests {
849    use super::normalize_field_name;
850    use std::collections::HashMap;
851
852    #[test]
853    fn dict_style_brackets_map_to_underscores() {
854        let mut m = HashMap::new();
855        let existing: Vec<Option<String>> = vec![];
856        assert_eq!(
857            normalize_field_name("hello[world]", &mut m, &existing),
858            "hello_world"
859        );
860        assert_eq!(
861            normalize_field_name("hello[foo][baz]", &mut m, &existing),
862            "hello_foo_baz"
863        );
864    }
865
866    #[test]
867    fn deep_nested_brackets_normalize() {
868        let mut m = HashMap::new();
869        assert_eq!(normalize_field_name("a[b[c[d]]]", &mut m, &[]), "a_b_c_d");
870    }
871}