Skip to main content

copybook_core/
parser.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! COBOL copybook parser
3//!
4//! This module implements the parsing logic for COBOL copybooks,
5//! including lexical analysis and AST construction.
6
7use crate::error::error;
8use crate::error::{ErrorCode, ErrorContext};
9use crate::feature_flags::{Feature, FeatureFlags};
10use crate::lexer::{Lexer, Token, TokenPos};
11use crate::pic::PicClause;
12use crate::schema::{Field, FieldKind, Occurs, Schema, SignPlacement, SignSeparateInfo};
13use crate::utils::VecExt;
14use crate::{Error, Result};
15
16/// Returns true if `s` is a valid COBOL data name (letter-start, alphanumeric + hyphen).
17fn is_cobol_data_name(s: &str) -> bool {
18    let mut chars = s.chars();
19    match chars.next() {
20        Some(c) if c.is_ascii_alphabetic() => {}
21        _ => return false,
22    }
23    chars.all(|c| c.is_ascii_alphanumeric() || c == '-')
24}
25
26/// Extract a data name from a token, accepting `Identifier` directly and
27/// `EditedPic`/`PicClause` when the text is a valid COBOL data name.
28fn try_extract_data_name(token_pos: &TokenPos) -> Option<String> {
29    match &token_pos.token {
30        Token::Identifier(name) => Some(name.clone()),
31        Token::EditedPic(text) | Token::PicClause(text) if is_cobol_data_name(text) => {
32            Some(text.clone())
33        }
34        _ => None,
35    }
36}
37
38/// Parse a COBOL copybook text into a schema
39///
40/// # Errors
41/// Returns an error if the copybook contains syntax errors or unsupported features.
42#[inline]
43#[must_use = "Handle the Result or propagate the error"]
44pub fn parse(text: &str) -> Result<Schema> {
45    parse_with_options(text, &ParseOptions::default())
46}
47
48/// Parse a COBOL copybook text into a schema with specific options
49///
50/// # Errors
51/// Returns an error if the copybook contains syntax errors or unsupported features.
52#[inline]
53#[must_use = "Handle the Result or propagate the error"]
54pub fn parse_with_options(text: &str, options: &ParseOptions) -> Result<Schema> {
55    if text.trim().is_empty() {
56        return Err(error!(ErrorCode::CBKP001_SYNTAX, "Empty copybook text"));
57    }
58
59    let tokens = Lexer::new_with_options(text, options).tokenize();
60    let mut parser = Parser::with_options(tokens, options.clone());
61    parser.parse_schema()
62}
63
64/// Options for controlling COBOL copybook parsing behavior.
65///
66/// Configures how the parser handles various COBOL dialect features,
67/// comment styles, and validation strictness.
68#[derive(Debug, Clone)]
69#[allow(clippy::struct_excessive_bools)]
70pub struct ParseOptions {
71    /// Whether to emit FILLER fields in the parsed schema output.
72    pub emit_filler: bool,
73    /// Codepage identifier used for fingerprint calculation (e.g., `"cp037"`).
74    pub codepage: String,
75    /// Whether to allow COBOL-2002 inline comments (`*>`).
76    pub allow_inline_comments: bool,
77    /// Whether to run in strict mode with less error tolerance.
78    pub strict: bool,
79    /// Whether to enforce strict comment parsing rules.
80    pub strict_comments: bool,
81    /// Dialect for ODO `min_count` interpretation.
82    pub dialect: crate::dialect::Dialect,
83}
84
85impl Default for ParseOptions {
86    fn default() -> Self {
87        Self {
88            emit_filler: false,
89            codepage: "cp037".to_string(),
90            allow_inline_comments: true,
91            strict: false,
92            strict_comments: false,
93            dialect: crate::dialect::Dialect::Normative,
94        }
95    }
96}
97
98/// Parser state for COBOL copybook parsing
99struct Parser {
100    tokens: Vec<TokenPos>,
101    current: usize,
102    options: ParseOptions,
103}
104
105impl Parser {
106    fn with_options(tokens: Vec<TokenPos>, options: ParseOptions) -> Self {
107        Self {
108            tokens,
109            current: 0,
110            options,
111        }
112    }
113
114    fn require_feature_enabled(
115        &self,
116        feature: Feature,
117        field_name: &str,
118        feature_name: &str,
119        syntax: &str,
120    ) -> Result<()> {
121        if FeatureFlags::global().is_enabled(feature) {
122            return Ok(());
123        }
124
125        Err(Error::new(
126            ErrorCode::CBKP011_UNSUPPORTED_CLAUSE,
127            format!(
128                "{syntax} is not supported for field '{field_name}' (enable with --enable-features {feature_name})"
129            ),
130        ))
131    }
132
133    /// Parse the complete schema
134    fn parse_schema(&mut self) -> Result<Schema> {
135        // Skip any leading comments or empty lines
136        self.skip_comments_and_newlines()?;
137
138        // Parse all field definitions into a flat list first
139        let mut flat_fields = Vec::new();
140        while !self.is_at_end() {
141            if let Some(field) = self.parse_field()? {
142                flat_fields.push(field);
143            }
144            self.skip_comments_and_newlines()?;
145        }
146
147        if flat_fields.is_empty() {
148            return Err(error!(
149                ErrorCode::CBKP001_SYNTAX,
150                "No valid field definitions found"
151            ));
152        }
153
154        // Build hierarchical structure from flat fields
155        let mut hierarchical_fields = self.build_hierarchy(flat_fields)?;
156
157        // Disambiguate duplicate sibling names and recompute paths
158        finalize_names_and_paths(&mut hierarchical_fields, None, self.options.emit_filler);
159
160        // Validate the structure (REDEFINES targets, ODO constraints, etc.)
161        self.validate_structure(&hierarchical_fields)?;
162
163        // Create schema with fingerprint
164        let mut schema = Schema::from_fields(hierarchical_fields);
165
166        // Resolve field layouts and compute offsets (dialect affects ODO min_count)
167        crate::layout::resolve_layout(&mut schema, self.options.dialect)?;
168
169        self.calculate_schema_fingerprint(&mut schema);
170
171        Ok(schema)
172    }
173
174    /// Build hierarchical structure from flat field list
175    fn build_hierarchy(&mut self, mut flat_fields: Vec<Field>) -> Result<Vec<Field>> {
176        if flat_fields.is_empty() {
177            return Ok(Vec::new());
178        }
179
180        // Handle duplicate names and FILLER fields
181        self.process_field_names(&mut flat_fields);
182
183        // Build hierarchical structure using a stack-based approach
184        let mut stack: Vec<Field> = Vec::new();
185        let mut result: Vec<Field> = Vec::new();
186
187        for mut field in flat_fields {
188            // Set initial path
189            field.path = field.name.clone();
190
191            // Special handling for level-88 (condition values) and level-66 (RENAMES)
192            if field.level == 88 {
193                // Level-88 is a child of the immediately preceding field
194                if let Some(parent) = stack.last_mut() {
195                    field.path = format!("{}.{}", parent.path, field.name);
196                    parent.children.push(field);
197                } else {
198                    return Err(Error::new(
199                        ErrorCode::CBKP001_SYNTAX,
200                        "Level-88 condition must follow a data field".to_string(),
201                    ));
202                }
203                continue;
204            }
205
206            // Level-66 (RENAMES) is a non-storage sibling under the same parent group
207            let is_renames = field.level == 66;
208
209            // Special handling for RENAMES: close all scopes back to level-01
210            if is_renames {
211                // Level-66 should trigger closing of all open scopes (groups and leaf fields)
212                // back to the level-01 record, just like encountering a new level-01 sibling would.
213                // We pop fields and attach each to its parent as we go (like normal field processing),
214                // stopping when we reach the level-01 record.
215                while let Some(top) = stack.last() {
216                    // Stop at level-01 (always keep it on stack)
217                    if top.level == 1 {
218                        break;
219                    }
220
221                    // Pop this field and attach it to its parent
222                    let mut completed_field = stack.pop_or_cbkp_error(
223                        ErrorCode::CBKP001_SYNTAX,
224                        "Parser stack underflow while attaching RENAMES",
225                    )?;
226
227                    // Mark as group only if it has storage-bearing children (exclude level-88).
228                    if completed_field
229                        .children
230                        .iter()
231                        .any(|child| child.level != 88)
232                    {
233                        completed_field.kind = FieldKind::Group;
234                    }
235
236                    // Attach to parent (like normal field processing at lines 220-223)
237                    if let Some(parent) = stack.last_mut() {
238                        completed_field.path = format!("{}.{}", parent.path, completed_field.name);
239                        parent.children.push(completed_field);
240                    } else {
241                        return Err(Error::new(
242                            ErrorCode::CBKP001_SYNTAX,
243                            "Level-66 RENAMES must be within a record group".to_string(),
244                        ));
245                    }
246                }
247
248                // Now attach the level-66 field itself as a sibling
249                let parent = stack.last_mut().ok_or_else(|| {
250                    Error::new(
251                        ErrorCode::CBKP001_SYNTAX,
252                        "Level-66 RENAMES must be within a record group".to_string(),
253                    )
254                })?;
255
256                field.path = format!("{}.{}", parent.path, field.name);
257                parent.children.push(field);
258                continue;
259            }
260
261            // Pop fields from stack that are at same or higher level (normal fields)
262            while let Some(top) = stack.last() {
263                if top.level >= field.level {
264                    let mut completed_field = stack.pop_or_cbkp_error(
265                        ErrorCode::CBKP001_SYNTAX,
266                        "Parser stack underflow: expected field to pop but stack was empty",
267                    )?;
268
269                    // If this field has storage-bearing children, make it a group
270                    if completed_field
271                        .children
272                        .iter()
273                        .any(|child| child.level != 88)
274                    {
275                        completed_field.kind = FieldKind::Group;
276                    }
277
278                    // Add to parent or result
279                    if let Some(parent) = stack.last_mut() {
280                        // Update path to include parent
281                        completed_field.path = format!("{}.{}", parent.path, completed_field.name);
282                        parent.children.push(completed_field);
283                    } else {
284                        result.push(completed_field);
285                    }
286                } else {
287                    break;
288                }
289            }
290
291            // Update path if we have a parent
292            if let Some(parent) = stack.last() {
293                field.path = format!("{}.{}", parent.path, field.name);
294            }
295
296            stack.push(field);
297        }
298
299        // Pop remaining fields from stack
300        while let Some(mut field) = stack.pop() {
301            // If this field has storage-bearing children, make it a group
302            // (Level-88 conditions are non-storage and should not promote parent to Group)
303            if field.children.iter().any(|child| child.level != 88) {
304                field.kind = FieldKind::Group;
305            }
306
307            // Level-77 is an independent item, always goes to result (not under a parent)
308            if field.level == 77 {
309                result.push(field);
310                continue;
311            }
312
313            // Add to parent or result
314            if let Some(parent) = stack.last_mut() {
315                parent.children.push(field);
316            } else {
317                result.push(field);
318            }
319        }
320
321        Ok(result)
322    }
323
324    /// Process field names for FILLER handling (on flat list before hierarchy)
325    fn process_field_names(&mut self, fields: &mut [Field]) {
326        for field in fields.iter_mut() {
327            if field.name.to_uppercase() == "FILLER" {
328                if self.options.emit_filler {
329                    // Replace FILLER with _filler_<offset> (offset will be calculated later in layout resolution)
330                    // For now, use a placeholder that will be updated
331                    field.name = format!("_filler_{}", 0);
332                } else {
333                    // Keep FILLER name for now, will be filtered out in layout resolution
334                    field.name = "FILLER".to_string();
335                }
336            }
337        }
338    }
339
340    /// Validate the parsed structure
341    fn validate_structure(&self, fields: &[Field]) -> Result<()> {
342        // Validate REDEFINES targets
343        self.validate_redefines(fields)?;
344
345        // Validate ODO constraints
346        self.validate_odo_constraints(fields)?;
347
348        Ok(())
349    }
350
351    /// Validate REDEFINES relationships
352    fn validate_redefines(&self, fields: &[Field]) -> Result<()> {
353        let all_fields = Self::collect_all_fields(fields);
354
355        for field in &all_fields {
356            if let Some(ref target) = field.redefines_of {
357                // Find the target field
358                let target_found = all_fields
359                    .iter()
360                    .any(|f| f.name == *target || f.path == *target);
361
362                if !target_found {
363                    return Err(Error::new(
364                        ErrorCode::CBKP001_SYNTAX,
365                        format!(
366                            "REDEFINES target '{}' not found for field '{}'",
367                            target, field.name
368                        ),
369                    ));
370                }
371            }
372        }
373
374        Ok(())
375    }
376
377    /// Validate ODO constraints using hierarchical structure
378    fn validate_odo_constraints(&self, fields: &[Field]) -> Result<()> {
379        // Collect all fields for counter lookups
380        let all_fields = Self::collect_all_fields(fields);
381
382        // Validate each field group hierarchically
383        for field in fields {
384            self.validate_odo_in_group(field, &all_fields, false)?;
385        }
386
387        Ok(())
388    }
389
390    /// Check if a field is inside a REDEFINES region by walking the path
391    fn is_inside_redefines(&self, field_path: &str, all_fields: &[&Field]) -> bool {
392        // Check all ancestor paths to see if any have redefines_of
393        for ancestor in all_fields {
394            if field_path.starts_with(&format!("{}.", ancestor.path))
395                && ancestor.redefines_of.is_some()
396            {
397                return true;
398            }
399        }
400        false
401    }
402
403    /// Recursively validate ODO constraints within a field group
404    ///
405    /// # Arguments
406    /// * `field` - The field to validate
407    /// * `all_fields` - All fields for counter lookups
408    /// * `inside_occurs` - Whether we're already inside an OCCURS/ODO array
409    fn validate_odo_in_group(
410        &self,
411        field: &Field,
412        all_fields: &[&Field],
413        inside_occurs: bool,
414    ) -> Result<()> {
415        // Check if this field is an ODO array
416        if let Some(Occurs::ODO { counter_path, .. }) = &field.occurs {
417            // O5: Check for nested ODO (ODO inside OCCURS/ODO)
418            if inside_occurs {
419                return Err(Error::new(
420                    ErrorCode::CBKP022_NESTED_ODO,
421                    format!(
422                        "Nested ODO not supported: field '{}' has OCCURS DEPENDING ON inside another OCCURS/ODO array",
423                        field.path
424                    ),
425                ));
426            }
427
428            // O6: Check for ODO inside REDEFINES region
429            if self.is_inside_redefines(&field.path, all_fields) || field.redefines_of.is_some() {
430                return Err(Error::new(
431                    ErrorCode::CBKP023_ODO_REDEFINES,
432                    format!(
433                        "ODO over REDEFINES not supported: field '{}' has OCCURS DEPENDING ON inside a REDEFINES region",
434                        field.path
435                    ),
436                ));
437            }
438            // Find the counter field
439            let counter_field = all_fields
440                .iter()
441                .find(|f| f.name == *counter_path || f.path == *counter_path);
442
443            if counter_field.is_none() {
444                return Err(Error::new(
445                    ErrorCode::CBKS121_COUNTER_NOT_FOUND,
446                    format!(
447                        "ODO counter field '{}' not found for array '{}'",
448                        counter_path, field.name
449                    ),
450                ));
451            }
452
453            // Validate that counter is not inside REDEFINES or ODO region
454            if let Some(counter) = counter_field {
455                if counter.redefines_of.is_some() {
456                    return Err(Error::new(
457                        ErrorCode::CBKS121_COUNTER_NOT_FOUND,
458                        format!(
459                            "ODO counter '{}' cannot be inside a REDEFINES region",
460                            counter_path
461                        ),
462                    ));
463                }
464
465                if self.is_inside_redefines(&counter.path, all_fields) {
466                    return Err(Error::new(
467                        ErrorCode::CBKS121_COUNTER_NOT_FOUND,
468                        format!(
469                            "ODO counter '{}' cannot be inside a REDEFINES region",
470                            counter_path
471                        ),
472                    ));
473                }
474
475                if counter.occurs.is_some() {
476                    return Err(Error::new(
477                        ErrorCode::CBKS121_COUNTER_NOT_FOUND,
478                        format!(
479                            "ODO counter '{}' cannot be inside an ODO region",
480                            counter_path
481                        ),
482                    ));
483                }
484            }
485        }
486
487        // Determine if we're now inside an OCCURS/ODO region
488        let child_inside_occurs = inside_occurs || field.occurs.is_some();
489
490        // Recursively validate children and check ODO tail constraints
491        for (i, child) in field.children.iter().enumerate() {
492            // Check if child is ODO and enforce tail position rule
493            if child
494                .occurs
495                .as_ref()
496                .is_some_and(|o| matches!(o, Occurs::ODO { .. }))
497                && !self.is_odo_at_tail_sibling_based(child, &field.children, i)
498            {
499                return Err(Error::new(
500                    ErrorCode::CBKP021_ODO_NOT_TAIL,
501                    format!(
502                        "ODO array '{}' must be last storage field under '{}'",
503                        child.path, field.path
504                    ),
505                ));
506            }
507
508            // Recursively validate this child's subtree, passing down OCCURS context
509            self.validate_odo_in_group(child, all_fields, child_inside_occurs)?;
510        }
511
512        Ok(())
513    }
514
515    /// Check if ODO array is the last storage sibling (structural, sibling-based logic)
516    fn is_odo_at_tail_sibling_based(
517        &self,
518        _odo_field: &Field,
519        siblings: &[Field],
520        odo_index: usize,
521    ) -> bool {
522        // Check if there are any storage fields after this ODO field among siblings
523        !siblings
524            .iter()
525            .skip(odo_index + 1)
526            .any(|sibling| self.is_storage_field(sibling))
527        // Return true if no storage siblings found after ODO
528    }
529
530    /// Check if a field is a storage field (excludes level 88 and non-storage field types)
531    fn is_storage_field(&self, field: &Field) -> bool {
532        // Exclude level 88 (condition names)
533        if field.level == 88 {
534            return false;
535        }
536
537        // Check if field kind would have storage (independent of calculated length)
538        match &field.kind {
539            FieldKind::Group => {
540                // Groups have storage if they have storage children or aren't just containers
541                // For validation purposes, consider groups as having potential storage
542                true
543            }
544            FieldKind::Alphanum { .. }
545            | FieldKind::ZonedDecimal { .. }
546            | FieldKind::BinaryInt { .. }
547            | FieldKind::PackedDecimal { .. }
548            | FieldKind::EditedNumeric { .. }
549            | FieldKind::FloatSingle
550            | FieldKind::FloatDouble => true,
551            FieldKind::Condition { .. } => false, // Level-88 fields don't have storage
552            FieldKind::Renames { .. } => false,   // Level-66 fields don't have storage
553        }
554    }
555
556    /// Collect all fields in a flat list
557    fn collect_all_fields(fields: &[Field]) -> Vec<&Field> {
558        let mut result = Vec::new();
559        for field in fields {
560            result.push(field);
561            let children = Self::collect_all_fields(&field.children);
562            result.extend(children);
563        }
564        result
565    }
566
567    /// Calculate schema fingerprint using SHA-256 including parse options
568    fn calculate_schema_fingerprint(&self, schema: &mut Schema) {
569        use sha2::{Digest, Sha256};
570
571        // Use schema's canonical JSON representation
572        let canonical_json = schema.create_canonical_json();
573
574        // Create hasher and add canonical JSON
575        let mut hasher = Sha256::new();
576        hasher.update(canonical_json.as_bytes());
577
578        // Add parse-specific options that affect fingerprint
579        hasher.update(self.options.codepage.as_bytes());
580        hasher.update([if self.options.emit_filler { 1 } else { 0 }]);
581
582        // Compute final hash
583        let result = hasher.finalize();
584        schema.fingerprint = format!("{:x}", result);
585    }
586
587    /// Parse a single field definition
588    fn parse_field(&mut self) -> Result<Option<Field>> {
589        // Look for level number
590        let level = match self.current_token() {
591            Some(TokenPos {
592                token: Token::Level(n),
593                ..
594            }) => {
595                let level = *n;
596                self.advance();
597                level
598            }
599            Some(TokenPos {
600                token: Token::Level66,
601                ..
602            }) => {
603                // Parse 66-level (RENAMES) entries
604                let level = 66;
605                self.advance();
606                level
607            }
608            Some(TokenPos {
609                token: Token::Level77,
610                ..
611            }) => {
612                let level = 77;
613                self.advance();
614                level
615            }
616            Some(TokenPos {
617                token: Token::Level88,
618                ..
619            }) => {
620                let level = 88;
621                self.advance();
622                level
623            }
624            Some(TokenPos {
625                token: Token::Number(n),
626                line,
627                ..
628            }) => {
629                // Handle invalid level numbers more carefully
630                // 0 and numbers > 49 are invalid as COBOL level numbers
631                // But only report as error if they appear in level number position
632                if (*n == 0) || (*n >= 50 && *n <= 99) {
633                    let line_number = *line;
634                    // Convert line number safely, omitting from context if conversion fails
635                    // to avoid silently corrupting error information with u32::MAX
636                    let safe_line_number = copybook_overflow::safe_usize_to_u32(
637                        line_number,
638                        "error context line number",
639                    )
640                    .ok();
641
642                    return Err(Error::new(
643                        ErrorCode::CBKP001_SYNTAX,
644                        format!("Invalid level number '{}'", n),
645                    )
646                    .with_context(ErrorContext {
647                        record_index: None,
648                        field_path: None,
649                        byte_offset: None,
650                        line_number: safe_line_number,
651                        details: None,
652                    }));
653                } else if *n >= 1 && *n <= 49 {
654                    // Valid single-digit level numbers (1-49) without leading zeros
655                    // Convert to proper level number
656                    let level = *n as u8;
657                    self.advance();
658                    level
659                } else {
660                    // Large numbers are likely sequence numbers or other contexts
661                    self.advance();
662                    return Ok(None);
663                }
664            }
665            _ => {
666                // If we encounter an unrecognized token, advance to avoid infinite loop
667                self.advance();
668                return Ok(None);
669            }
670        };
671
672        // Get field name
673        let mut name = match self.current_token().and_then(try_extract_data_name) {
674            Some(n) => {
675                self.advance();
676                n
677            }
678            _ => {
679                return Err(Error::new(
680                    ErrorCode::CBKP001_SYNTAX,
681                    format!("Expected field name after level {}", level),
682                ));
683            }
684        };
685
686        // Handle FILLER fields
687        if name.to_uppercase() == "FILLER" && !self.options.emit_filler {
688            // For now, keep FILLER name - it will be processed later
689            name = "FILLER".to_string();
690        }
691
692        // Parse field clauses
693        let mut field = Field::new(level, name);
694
695        while !self.check(&Token::Period) && !self.is_at_end() {
696            self.parse_field_clause(&mut field)?;
697        }
698
699        // Expect period to end field definition
700        if !self.consume(&Token::Period) {
701            return Err(Error::new(
702                ErrorCode::CBKP001_SYNTAX,
703                format!("Expected period after field definition for {}", field.name),
704            ));
705        }
706
707        // Validate that level-66 fields have RENAMES clause
708        if level == 66 && !matches!(field.kind, FieldKind::Renames { .. }) {
709            return Err(Error::new(
710                ErrorCode::CBKP001_SYNTAX,
711                format!("Level-66 field '{}' must have RENAMES clause", field.name),
712            ));
713        }
714
715        Ok(Some(field))
716    }
717
718    /// Parse a field clause (PIC, USAGE, REDEFINES, etc.)
719    fn parse_field_clause(&mut self, field: &mut Field) -> Result<()> {
720        match self.current_token() {
721            Some(TokenPos {
722                token: Token::Pic, ..
723            }) => {
724                self.advance();
725                self.parse_pic_clause(field)?;
726            }
727            Some(TokenPos {
728                token: Token::Usage,
729                ..
730            }) => {
731                self.advance();
732                self.parse_usage_clause(field)?;
733            }
734            Some(TokenPos {
735                token: Token::Redefines,
736                ..
737            }) => {
738                self.advance();
739                self.parse_redefines_clause(field)?;
740            }
741            Some(TokenPos {
742                token: Token::Renames,
743                ..
744            }) => {
745                // RENAMES clause (level-66 only)
746                if field.level == 66 {
747                    self.parse_renames(field)?;
748                } else {
749                    return Err(Error::new(
750                        ErrorCode::CBKP001_SYNTAX,
751                        format!(
752                            "RENAMES clause can only be used with level-66, not level {}",
753                            field.level
754                        ),
755                    ));
756                }
757            }
758            Some(TokenPos {
759                token: Token::Occurs,
760                ..
761            }) => {
762                self.advance();
763                self.parse_occurs_clause(field)?;
764            }
765            Some(TokenPos {
766                token: Token::Synchronized,
767                ..
768            }) => {
769                self.advance();
770                field.synchronized = true;
771            }
772            Some(TokenPos {
773                token: Token::Value,
774                ..
775            }) => {
776                if field.level == 88 {
777                    self.advance();
778                    self.parse_level88_value_clause(field)?;
779                } else {
780                    // Skip VALUE clauses for non-88 fields (metadata only)
781                    self.skip_value_clause()?;
782                }
783            }
784            Some(TokenPos {
785                token: Token::Blank,
786                ..
787            }) => {
788                self.advance();
789                self.parse_blank_when_zero_clause(field)?;
790            }
791            Some(TokenPos {
792                token: Token::Sign, ..
793            }) => {
794                self.advance();
795                // Check if SIGN SEPARATE feature is enabled
796                if FeatureFlags::global().is_enabled(Feature::SignSeparate) {
797                    self.parse_sign_clause(field)?;
798                } else {
799                    return Err(Error::new(
800                        ErrorCode::CBKP051_UNSUPPORTED_EDITED_PIC,
801                        format!(
802                            "SIGN clause on field '{}' is not supported (enable with --enable-features sign_separate)",
803                            field.name
804                        ),
805                    ));
806                }
807            }
808            Some(TokenPos {
809                token: Token::Comp, ..
810            }) => {
811                self.advance();
812                self.convert_to_binary_field(field)?;
813            }
814            Some(TokenPos {
815                token: Token::Comp3,
816                ..
817            }) => {
818                self.advance();
819                self.convert_to_packed_field(field)?;
820            }
821            Some(TokenPos {
822                token: Token::Comp1,
823                ..
824            }) => {
825                self.advance();
826                self.require_feature_enabled(Feature::Comp1, &field.name, "comp_1", "COMP-1")?;
827                field.kind = FieldKind::FloatSingle;
828            }
829            Some(TokenPos {
830                token: Token::Comp2,
831                ..
832            }) => {
833                self.advance();
834                self.require_feature_enabled(Feature::Comp2, &field.name, "comp_2", "COMP-2")?;
835                field.kind = FieldKind::FloatDouble;
836            }
837            Some(TokenPos {
838                token: Token::Binary,
839                ..
840            }) => {
841                self.advance();
842                self.convert_to_binary_field(field)?;
843            }
844            _ => {
845                // Unknown clause - advance and continue
846                self.advance();
847            }
848        }
849        Ok(())
850    }
851
852    /// Parse PIC clause
853    fn parse_pic_clause(&mut self, field: &mut Field) -> Result<()> {
854        // Collect PIC clause tokens - might be split across multiple tokens
855        let mut pic_parts = Vec::new();
856
857        // Track the starting line for same-line checks
858        let token_line = self.current_token().map_or(0, |t| t.line);
859
860        // First token should be a PIC clause or identifier
861        match self.current_token() {
862            Some(TokenPos {
863                token: Token::PicClause(pic),
864                ..
865            }) => {
866                pic_parts.push(pic.clone());
867                self.advance();
868            }
869            Some(TokenPos {
870                token: Token::EditedPic(pic),
871                ..
872            }) => {
873                // Phase E1: Accept edited PIC and push to parts for parsing
874                pic_parts.push(pic.clone());
875                self.advance();
876            }
877            Some(TokenPos {
878                token: Token::Identifier(id),
879                ..
880            }) => {
881                // This might be part of a PIC clause like "S9(7)" followed by "V99"
882                pic_parts.push(id.clone());
883                self.advance();
884            }
885            Some(TokenPos {
886                token: Token::Number(n),
887                ..
888            }) => {
889                // Lexer tokenizes e.g. "999" as Number (priority 4 > PicClause priority 3)
890                pic_parts.push(n.to_string());
891                self.advance();
892            }
893            _ => {
894                return Err(Error::new(
895                    ErrorCode::CBKP001_SYNTAX,
896                    "Expected PIC clause after PIC keyword",
897                ));
898            }
899        }
900
901        // Track whether we're inside parentheses
902        let mut paren_depth: i32 = 0;
903
904        // Collect repetition count and sign indicators if present
905        while let Some(token) = self.current_token() {
906            match &token.token {
907                Token::LeftParen => {
908                    paren_depth += 1;
909                    pic_parts.push("(".to_string());
910                    self.advance();
911                }
912                Token::Number(n) if paren_depth > 0 => {
913                    pic_parts.push(n.to_string());
914                    self.advance();
915                }
916                Token::RightParen if paren_depth > 0 => {
917                    paren_depth -= 1;
918                    pic_parts.push(")".to_string());
919                    self.advance();
920                }
921                Token::Comma => {
922                    // Comma in edited PIC patterns like $ZZ,ZZZ.99
923                    pic_parts.push(",".to_string());
924                    self.advance();
925                }
926                Token::EditedPic(ep) => {
927                    // Continuation of edited PIC after comma/period
928                    pic_parts.push(ep.clone());
929                    self.advance();
930                }
931                Token::Period => {
932                    // Period could be decimal point or sentence terminator.
933                    // Only treat as decimal point if:
934                    // 1. On the same line as the PIC start
935                    // 2. The last part is NOT a closing paren (e.g., `PIC X(20).` → terminator)
936                    // 3. Something follows on the same line that looks like more PIC
937                    let t = token.clone();
938                    let last_is_rparen = pic_parts
939                        .last()
940                        .is_some_and(|s| s == ")" || s.ends_with(')'));
941                    if !last_is_rparen
942                        && t.line == token_line
943                        && let Some(next) = self.peek_next()
944                    {
945                        let is_pic_continuation = matches!(
946                            next.token,
947                            Token::Number(_)
948                                | Token::PicClause(_)
949                                | Token::EditedPic(_)
950                                | Token::Identifier(_)
951                        );
952                        if next.line == token_line && is_pic_continuation {
953                            pic_parts.push(".".to_string());
954                            self.advance();
955                            continue;
956                        }
957                    }
958                    break;
959                }
960                Token::Number(n) => {
961                    // Number outside parens on same line (e.g., "ZZZ9" split as EditedPic + Number)
962                    let t = token.clone();
963                    if t.line == token_line {
964                        pic_parts.push(n.to_string());
965                        self.advance();
966                    } else {
967                        break;
968                    }
969                }
970                Token::Identifier(id) if id.starts_with('V') || id.starts_with('v') => {
971                    pic_parts.push(id.clone());
972                    self.advance();
973                }
974                // Collect sign-related identifiers (CR, DB, etc.)
975                Token::Identifier(id) => {
976                    let id_upper = id.to_ascii_uppercase();
977                    if id_upper == "CR" || id_upper == "DB" {
978                        pic_parts.push(id.clone());
979                        self.advance();
980                    } else {
981                        break;
982                    }
983                }
984                _ => break,
985            }
986        }
987
988        let pic_str = pic_parts.join("");
989        let pic = PicClause::parse(&pic_str)?;
990
991        field.kind = match pic.kind {
992            crate::pic::PicKind::Alphanumeric => FieldKind::Alphanum {
993                len: pic.digits as u32,
994            },
995            crate::pic::PicKind::NumericDisplay => FieldKind::ZonedDecimal {
996                digits: pic.digits,
997                scale: pic.scale,
998                signed: pic.signed,
999                sign_separate: None,
1000            },
1001            crate::pic::PicKind::Edited => {
1002                // Phase E2: Parse edited PIC into schema with scale
1003                FieldKind::EditedNumeric {
1004                    pic_string: pic_str.clone(),
1005                    width: pic.digits,
1006                    scale: pic.scale as u16,
1007                    signed: pic.signed,
1008                }
1009            }
1010        };
1011
1012        Ok(())
1013    }
1014
1015    /// Parse USAGE clause
1016    fn parse_usage_clause(&mut self, field: &mut Field) -> Result<()> {
1017        match self.current_token() {
1018            Some(TokenPos {
1019                token: Token::Display,
1020                ..
1021            }) => {
1022                self.advance();
1023                // USAGE DISPLAY is the default, no change needed
1024            }
1025            Some(TokenPos {
1026                token: Token::Comp, ..
1027            }) => {
1028                self.advance();
1029                self.convert_to_binary_field(field)?;
1030            }
1031            Some(TokenPos {
1032                token: Token::Comp3,
1033                ..
1034            }) => {
1035                self.advance();
1036                self.convert_to_packed_field(field)?;
1037            }
1038            Some(TokenPos {
1039                token: Token::Comp1,
1040                ..
1041            }) => {
1042                self.advance();
1043                self.require_feature_enabled(
1044                    Feature::Comp1,
1045                    &field.name,
1046                    "comp_1",
1047                    "USAGE COMP-1",
1048                )?;
1049                field.kind = FieldKind::FloatSingle;
1050            }
1051            Some(TokenPos {
1052                token: Token::Comp2,
1053                ..
1054            }) => {
1055                self.advance();
1056                self.require_feature_enabled(
1057                    Feature::Comp2,
1058                    &field.name,
1059                    "comp_2",
1060                    "USAGE COMP-2",
1061                )?;
1062                field.kind = FieldKind::FloatDouble;
1063            }
1064            Some(TokenPos {
1065                token: Token::Binary,
1066                ..
1067            }) => {
1068                self.advance();
1069                self.convert_to_binary_field(field)?;
1070            }
1071            _ => {
1072                return Err(Error::new(
1073                    ErrorCode::CBKP001_SYNTAX,
1074                    "Expected USAGE type after USAGE keyword",
1075                ));
1076            }
1077        }
1078        Ok(())
1079    }
1080
1081    /// Parse REDEFINES clause
1082    fn parse_redefines_clause(&mut self, field: &mut Field) -> Result<()> {
1083        let target = match self.current_token().and_then(try_extract_data_name) {
1084            Some(n) => {
1085                self.advance();
1086                n
1087            }
1088            _ => {
1089                return Err(Error::new(
1090                    ErrorCode::CBKP001_SYNTAX,
1091                    "Expected field name after REDEFINES",
1092                ));
1093            }
1094        };
1095
1096        field.redefines_of = Some(target);
1097        Ok(())
1098    }
1099
1100    /// Parse OCCURS clause
1101    fn parse_occurs_clause(&mut self, field: &mut Field) -> Result<()> {
1102        let min = match self.current_token() {
1103            Some(TokenPos {
1104                token: Token::Number(n),
1105                ..
1106            }) => {
1107                let min = *n;
1108                self.advance();
1109                min
1110            }
1111            // Accept Level tokens as numbers in OCCURS context (01-49 are lexed as Level)
1112            Some(TokenPos {
1113                token: Token::Level(n),
1114                ..
1115            }) => {
1116                let min = u32::from(*n);
1117                self.advance();
1118                min
1119            }
1120            _ => {
1121                return Err(Error::new(
1122                    ErrorCode::CBKP001_SYNTAX,
1123                    "Expected number after OCCURS",
1124                ));
1125            }
1126        };
1127
1128        // Check for TO keyword (range syntax)
1129        let max = if self.check(&Token::To) {
1130            self.advance(); // consume TO
1131            match self.current_token() {
1132                Some(TokenPos {
1133                    token: Token::Number(n),
1134                    ..
1135                }) => {
1136                    let max = *n;
1137                    self.advance();
1138                    max
1139                }
1140                // Accept Level tokens as numbers in OCCURS context (01-49 are lexed as Level)
1141                Some(TokenPos {
1142                    token: Token::Level(n),
1143                    ..
1144                }) => {
1145                    let max = u32::from(*n);
1146                    self.advance();
1147                    max
1148                }
1149                _ => {
1150                    return Err(Error::new(
1151                        ErrorCode::CBKP001_SYNTAX,
1152                        "Expected number after TO in OCCURS clause",
1153                    ));
1154                }
1155            }
1156        } else {
1157            min // If no TO, then min == max (fixed count)
1158        };
1159
1160        // Skip optional TIMES keyword
1161        if self.check(&Token::Times) {
1162            self.advance();
1163        }
1164
1165        // Look for DEPENDING ON (might not be immediately next)
1166        let depending_pos = self.find_depending_in_clause();
1167
1168        if let Some(depending_idx) = depending_pos {
1169            // Found DEPENDING, advance to it
1170            self.current = depending_idx;
1171            self.advance(); // consume DEPENDING
1172
1173            if !self.consume(&Token::On) {
1174                return Err(Error::new(
1175                    ErrorCode::CBKP001_SYNTAX,
1176                    "Expected ON after DEPENDING",
1177                ));
1178            }
1179
1180            let counter_field = match self.current_token().and_then(try_extract_data_name) {
1181                Some(n) => {
1182                    self.advance();
1183                    n
1184                }
1185                _ => {
1186                    return Err(Error::new(
1187                        ErrorCode::CBKP001_SYNTAX,
1188                        "Expected field name after DEPENDING ON",
1189                    ));
1190                }
1191            };
1192
1193            field.occurs = Some(Occurs::ODO {
1194                min,
1195                max,
1196                counter_path: counter_field,
1197            });
1198        } else {
1199            // No DEPENDING ON found
1200            if min != max {
1201                return Err(Error::new(
1202                    ErrorCode::CBKP001_SYNTAX,
1203                    "Range syntax (min TO max) requires DEPENDING ON clause",
1204                ));
1205            }
1206            field.occurs = Some(Occurs::Fixed { count: min });
1207        }
1208
1209        Ok(())
1210    }
1211
1212    /// Find DEPENDING token in the current clause (before the next field starts)
1213    fn find_depending_in_clause(&self) -> Option<usize> {
1214        for i in self.current..self.tokens.len() {
1215            match &self.tokens[i].token {
1216                Token::Depending => return Some(i),
1217                // Stop looking when we hit the start of a new field
1218                Token::Level(_) | Token::Level66 | Token::Level77 | Token::Level88 => break,
1219                _ => {}
1220            }
1221        }
1222        None
1223    }
1224
1225    /// Parse BLANK WHEN ZERO clause
1226    fn parse_blank_when_zero_clause(&mut self, field: &mut Field) -> Result<()> {
1227        if !self.consume(&Token::When) {
1228            return Err(Error::new(
1229                ErrorCode::CBKP001_SYNTAX,
1230                "Expected WHEN after BLANK",
1231            ));
1232        }
1233
1234        if !self.consume(&Token::Zero) {
1235            return Err(Error::new(
1236                ErrorCode::CBKP001_SYNTAX,
1237                "Expected ZERO after BLANK WHEN",
1238            ));
1239        }
1240
1241        field.blank_when_zero = true;
1242        Ok(())
1243    }
1244
1245    /// Parse SIGN clause (`SIGN [IS] [LEADING|TRAILING] [SEPARATE]` or
1246    /// `SIGN [IS] SEPARATE [LEADING|TRAILING]`).
1247    fn parse_sign_clause(&mut self, field: &mut Field) -> Result<()> {
1248        // Optional IS keyword
1249        self.consume(&Token::Is);
1250
1251        // Parse SIGN clause components in either order:
1252        //   SIGN [IS] [SEPARATE] [LEADING|TRAILING]
1253        //   SIGN [IS] [LEADING|TRAILING] [SEPARATE]
1254        let mut placement = SignPlacement::Leading;
1255        let mut saw_placement = false;
1256        let mut saw_separate = false;
1257
1258        for _ in 0..2 {
1259            if self.consume(&Token::Separate) {
1260                if saw_separate {
1261                    return Err(Error::new(
1262                        ErrorCode::CBKP001_SYNTAX,
1263                        "Invalid SIGN clause syntax: duplicate SEPARATE",
1264                    ));
1265                }
1266                saw_separate = true;
1267                continue;
1268            }
1269
1270            if self.consume(&Token::Leading) {
1271                if saw_placement {
1272                    return Err(Error::new(
1273                        ErrorCode::CBKP001_SYNTAX,
1274                        "Invalid SIGN clause syntax: duplicate LEADING/TRAILING",
1275                    ));
1276                }
1277                placement = SignPlacement::Leading;
1278                saw_placement = true;
1279                continue;
1280            }
1281
1282            if self.consume(&Token::Trailing) {
1283                if saw_placement {
1284                    return Err(Error::new(
1285                        ErrorCode::CBKP001_SYNTAX,
1286                        "Invalid SIGN clause syntax: duplicate LEADING/TRAILING",
1287                    ));
1288                }
1289                placement = SignPlacement::Trailing;
1290                saw_placement = true;
1291                continue;
1292            }
1293
1294            break;
1295        }
1296
1297        if !saw_separate {
1298            // SIGN without SEPARATE is not supported in this implementation
1299            // (it would use overpunching which is already handled by signed zoned decimals)
1300            return Err(Error::new(
1301                ErrorCode::CBKP001_SYNTAX,
1302                "SIGN clause without SEPARATE is not supported (use S in PIC clause for overpunching)",
1303            ));
1304        }
1305
1306        // Do not allow unknown tokens to silently attach to the SIGN clause.
1307        // The next token should be a field clause boundary or the field terminator.
1308        if let Some(next) = self.current_token() {
1309            let next_is_boundary = matches!(
1310                &next.token,
1311                Token::Period
1312                    | Token::Newline
1313                    | Token::InlineComment(_)
1314                    | Token::Pic
1315                    | Token::Usage
1316                    | Token::Redefines
1317                    | Token::Renames
1318                    | Token::Occurs
1319                    | Token::Synchronized
1320                    | Token::Value
1321                    | Token::Blank
1322                    | Token::Sign
1323                    | Token::Comp
1324                    | Token::Comp3
1325                    | Token::Comp1
1326                    | Token::Comp2
1327                    | Token::Binary
1328            );
1329
1330            if !next_is_boundary {
1331                return Err(Error::new(
1332                    ErrorCode::CBKP001_SYNTAX,
1333                    "Invalid SIGN clause syntax. Expected SEPARATE with optional LEADING/TRAILING",
1334                ));
1335            }
1336        }
1337
1338        // Validate that field is a numeric display field
1339        match &mut field.kind {
1340            FieldKind::ZonedDecimal {
1341                digits,
1342                scale,
1343                signed,
1344                sign_separate: _,
1345            } => {
1346                *signed = true;
1347                // Add sign separate info to the field
1348                let sign_separate_info = SignSeparateInfo { placement };
1349                // Update field kind to include sign separate info
1350                field.kind = FieldKind::ZonedDecimal {
1351                    digits: *digits,
1352                    scale: *scale,
1353                    signed: true,
1354                    sign_separate: Some(sign_separate_info),
1355                };
1356            }
1357            _ => {
1358                return Err(Error::new(
1359                    ErrorCode::CBKP001_SYNTAX,
1360                    format!(
1361                        "SIGN SEPARATE clause can only be used with numeric display fields (PIC 9 or PIC S9), not field '{}'",
1362                        field.name
1363                    ),
1364                ));
1365            }
1366        }
1367
1368        // Cannot combine SIGN SEPARATE with BLANK WHEN ZERO
1369        if field.blank_when_zero {
1370            return Err(Error::new(
1371                ErrorCode::CBKP001_SYNTAX,
1372                "SIGN SEPARATE clause cannot be combined with BLANK WHEN ZERO",
1373            ));
1374        }
1375
1376        Ok(())
1377    }
1378
1379    /// Parse Level-88 VALUE clause
1380    fn parse_level88_value_clause(&mut self, field: &mut Field) -> Result<()> {
1381        let mut values = Vec::new();
1382
1383        // Parse VALUE clauses - can be literals, ranges, or multiple values
1384        loop {
1385            match self.current_token() {
1386                Some(TokenPos {
1387                    token: Token::StringLiteral(s),
1388                    ..
1389                }) => {
1390                    values.push(s.clone());
1391                    self.advance();
1392                }
1393                Some(TokenPos {
1394                    token: Token::Number(n),
1395                    ..
1396                }) => {
1397                    values.push(n.to_string());
1398                    self.advance();
1399                }
1400                Some(TokenPos {
1401                    token: Token::Identifier(id),
1402                    ..
1403                }) if id.to_uppercase() == "ZEROS" || id.to_uppercase() == "ZEROES" => {
1404                    values.push("ZEROS".to_string());
1405                    self.advance();
1406                }
1407                Some(TokenPos {
1408                    token: Token::Identifier(id),
1409                    ..
1410                }) if id.to_uppercase() == "SPACES" => {
1411                    values.push("SPACES".to_string());
1412                    self.advance();
1413                }
1414                _ => break,
1415            }
1416
1417            // Check for THROUGH/THRU ranges or additional values
1418            let range_keyword = match self.current_token() {
1419                Some(TokenPos {
1420                    token: Token::Through,
1421                    ..
1422                }) => {
1423                    self.advance();
1424                    "THROUGH"
1425                }
1426                Some(TokenPos {
1427                    token: Token::Thru, ..
1428                }) => {
1429                    self.advance();
1430                    "THRU"
1431                }
1432                _ => {
1433                    // No range keyword, continue to next value/comma
1434                    if let Some(TokenPos {
1435                        token: Token::Comma,
1436                        ..
1437                    }) = self.current_token()
1438                    {
1439                        self.advance();
1440                    }
1441                    continue;
1442                }
1443            };
1444
1445            // Parse the range end value
1446            match self.current_token() {
1447                Some(TokenPos {
1448                    token: Token::StringLiteral(s),
1449                    ..
1450                }) => {
1451                    // Replace last value with range notation
1452                    if let Some(last) = values.last_mut() {
1453                        *last = format!("{} {} {}", last, range_keyword, s);
1454                    }
1455                    self.advance();
1456                }
1457                Some(TokenPos {
1458                    token: Token::Number(n),
1459                    ..
1460                }) => {
1461                    if let Some(last) = values.last_mut() {
1462                        *last = format!("{} {} {}", last, range_keyword, n);
1463                    }
1464                    self.advance();
1465                }
1466                _ => break,
1467            }
1468
1469            // Optionally consume a comma before the next value
1470            self.consume(&Token::Comma);
1471        }
1472
1473        if values.is_empty() {
1474            return Err(Error::new(
1475                ErrorCode::CBKP001_SYNTAX,
1476                "Level-88 VALUE clause requires at least one value",
1477            ));
1478        }
1479
1480        // Set field kind to Condition
1481        field.kind = FieldKind::Condition { values };
1482
1483        Ok(())
1484    }
1485
1486    /// Parse a qualified name (QNAME): IDENT (OF IDENT)*
1487    fn parse_qualified_name(&mut self) -> Result<String> {
1488        let mut parts = Vec::new();
1489
1490        // Parse first identifier
1491        match self.current_token().and_then(try_extract_data_name) {
1492            Some(n) => {
1493                parts.push(n);
1494                self.advance();
1495            }
1496            _ => {
1497                return Err(Error::new(
1498                    ErrorCode::CBKP001_SYNTAX,
1499                    "Expected identifier in qualified name",
1500                ));
1501            }
1502        }
1503
1504        // Parse optional "OF IDENT" sequences
1505        loop {
1506            match self.current_token() {
1507                Some(TokenPos {
1508                    token: Token::Identifier(name),
1509                    ..
1510                }) if name.eq_ignore_ascii_case("OF") => {
1511                    self.advance(); // consume OF
1512                    match self.current_token().and_then(try_extract_data_name) {
1513                        Some(n) => {
1514                            parts.push("OF".to_string());
1515                            parts.push(n);
1516                            self.advance();
1517                        }
1518                        _ => {
1519                            return Err(Error::new(
1520                                ErrorCode::CBKP001_SYNTAX,
1521                                "Expected identifier after OF in qualified name",
1522                            ));
1523                        }
1524                    }
1525                }
1526                _ => break,
1527            }
1528        }
1529
1530        Ok(parts.join(" "))
1531    }
1532
1533    /// Parse Level-66 RENAMES clause
1534    ///
1535    /// Syntax: 66 NAME RENAMES from-field THROUGH|THRU thru-field.
1536    /// Field names can be qualified: IDENT (OF IDENT)*
1537    fn parse_renames(&mut self, field: &mut Field) -> Result<()> {
1538        // Expect RENAMES keyword
1539        match self.current_token() {
1540            Some(TokenPos {
1541                token: Token::Renames,
1542                ..
1543            }) => {
1544                self.advance();
1545            }
1546            _ => {
1547                return Err(Error::new(
1548                    ErrorCode::CBKP001_SYNTAX,
1549                    "Expected RENAMES keyword after level-66 field name",
1550                ));
1551            }
1552        }
1553
1554        // Parse from-field qualified name
1555        let from_field = self.parse_qualified_name()?;
1556
1557        let thru_field = if self.check(&Token::Through) || self.check(&Token::Thru) {
1558            self.advance();
1559            self.parse_qualified_name()?
1560        } else {
1561            // R5 support uses single-field RENAMES without explicit THROUGH/THRU.
1562            from_field.clone()
1563        };
1564
1565        // Set field kind to Renames
1566        field.kind = FieldKind::Renames {
1567            from_field,
1568            thru_field,
1569        };
1570
1571        Ok(())
1572    }
1573
1574    /// Skip VALUE clause (not needed for layout)
1575    fn skip_value_clause(&mut self) -> Result<()> {
1576        self.advance(); // consume VALUE
1577
1578        // Skip until we find a keyword or period
1579        while !self.is_at_end() && !self.check(&Token::Period) {
1580            if self.is_keyword() {
1581                break;
1582            }
1583            self.advance();
1584        }
1585
1586        Ok(())
1587    }
1588
1589    /// Convert numeric field to binary
1590    fn convert_to_binary_field(&mut self, field: &mut Field) -> Result<()> {
1591        match &field.kind {
1592            FieldKind::ZonedDecimal { digits, signed, .. } => {
1593                let bits = match digits {
1594                    1..=4 => 16,
1595                    5..=9 => 32,
1596                    10..=18 => 64,
1597                    _ => {
1598                        return Err(Error::new(
1599                            ErrorCode::CBKP001_SYNTAX,
1600                            format!("Binary field with {} digits not supported", digits),
1601                        ));
1602                    }
1603                };
1604
1605                field.kind = FieldKind::BinaryInt {
1606                    bits,
1607                    signed: *signed,
1608                };
1609            }
1610            _ => {
1611                return Err(Error::new(
1612                    ErrorCode::CBKP001_SYNTAX,
1613                    "USAGE COMP/BINARY can only be applied to numeric fields",
1614                ));
1615            }
1616        }
1617        Ok(())
1618    }
1619
1620    /// Convert numeric field to packed decimal
1621    fn convert_to_packed_field(&mut self, field: &mut Field) -> Result<()> {
1622        match &field.kind {
1623            FieldKind::ZonedDecimal {
1624                digits,
1625                scale,
1626                signed,
1627                ..
1628            } => {
1629                field.kind = FieldKind::PackedDecimal {
1630                    digits: *digits,
1631                    scale: *scale,
1632                    signed: *signed,
1633                };
1634            }
1635            _ => {
1636                return Err(Error::new(
1637                    ErrorCode::CBKP001_SYNTAX,
1638                    "USAGE COMP-3 can only be applied to numeric fields",
1639                ));
1640            }
1641        }
1642        Ok(())
1643    }
1644
1645    /// Skip comments and newlines
1646    fn skip_comments_and_newlines(&mut self) -> Result<()> {
1647        while let Some(token) = self.current_token() {
1648            match &token.token {
1649                Token::InlineComment(_) => {
1650                    if self.options.strict_comments {
1651                        return Err(Error::new(
1652                            ErrorCode::CBKP001_SYNTAX,
1653                            "Inline comments (*>) are not allowed in strict mode",
1654                        ));
1655                    }
1656                    self.advance();
1657                }
1658                Token::Newline => {
1659                    self.advance();
1660                }
1661                _ => break,
1662            }
1663        }
1664        Ok(())
1665    }
1666
1667    /// Check if current token is a keyword
1668    fn is_keyword(&self) -> bool {
1669        matches!(
1670            self.current_token().map(|t| &t.token),
1671            Some(
1672                Token::Pic
1673                    | Token::Usage
1674                    | Token::Redefines
1675                    | Token::Occurs
1676                    | Token::Synchronized
1677                    | Token::Value
1678                    | Token::Blank
1679                    | Token::Sign
1680            )
1681        )
1682    }
1683
1684    /// Get current token
1685    fn current_token(&self) -> Option<&TokenPos> {
1686        self.tokens.get(self.current)
1687    }
1688
1689    /// Peek at the next token without advancing
1690    fn peek_next(&self) -> Option<&TokenPos> {
1691        self.tokens.get(self.current + 1)
1692    }
1693
1694    /// Advance to next token
1695    fn advance(&mut self) {
1696        if !self.is_at_end() {
1697            self.current += 1;
1698        }
1699    }
1700
1701    /// Check if we're at the end
1702    fn is_at_end(&self) -> bool {
1703        matches!(
1704            self.current_token().map(|t| &t.token),
1705            Some(Token::Eof) | None
1706        )
1707    }
1708
1709    /// Check if current token matches the given token
1710    fn check(&self, token: &Token) -> bool {
1711        if let Some(current) = self.current_token() {
1712            std::mem::discriminant(&current.token) == std::mem::discriminant(token)
1713        } else {
1714            false
1715        }
1716    }
1717
1718    /// Consume token if it matches, return true if consumed
1719    fn consume(&mut self, token: &Token) -> bool {
1720        if self.check(token) {
1721            self.advance();
1722            true
1723        } else {
1724            false
1725        }
1726    }
1727}
1728
1729/// Disambiguate duplicate names among true siblings (post-hierarchy) and recompute paths.
1730///
1731/// Walks the hierarchical field tree. At each level, siblings sharing a name
1732/// get `__dup2`, `__dup3`, … suffixes. Paths are rebuilt from the parent path
1733/// after renaming so they stay consistent.
1734fn finalize_names_and_paths(fields: &mut [Field], parent_path: Option<&str>, emit_filler: bool) {
1735    use std::collections::HashMap;
1736
1737    let mut counts: HashMap<String, u32> = HashMap::new();
1738
1739    for field in fields.iter_mut() {
1740        // Skip FILLER fields that won't be emitted — they don't need unique names
1741        if field.name == "FILLER" && !emit_filler {
1742            field.path = match parent_path {
1743                Some(parent) => format!("{parent}.{}", field.name),
1744                None => field.name.clone(),
1745            };
1746            continue;
1747        }
1748
1749        let base = field.name.clone();
1750        let count = counts.entry(base.clone()).or_insert(0);
1751        *count += 1;
1752        if *count > 1 {
1753            field.name = format!("{base}__dup{count}");
1754        }
1755
1756        field.path = match parent_path {
1757            Some(parent) => format!("{parent}.{}", field.name),
1758            None => field.name.clone(),
1759        };
1760    }
1761
1762    for field in fields.iter_mut() {
1763        let parent = field.path.clone();
1764        finalize_names_and_paths(&mut field.children, Some(&parent), emit_filler);
1765    }
1766}
1767
1768#[cfg(test)]
1769#[allow(clippy::expect_used)]
1770#[allow(clippy::unwrap_used)]
1771#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
1772mod tests {
1773    use super::*;
1774
1775    #[test]
1776    fn test_simple_field_parsing() {
1777        let input = "01 CUSTOMER-ID PIC X(10).";
1778        let schema = parse(input)
1779            .map_err(|e| {
1780                Error::new(
1781                    ErrorCode::CBKS141_RECORD_TOO_LARGE,
1782                    format!("Failed to parse copybook: {}", e),
1783                )
1784            })
1785            .expect("Failed to parse copybook");
1786
1787        assert_eq!(schema.fields.len(), 1);
1788        let field = &schema.fields[0];
1789        assert_eq!(field.name, "CUSTOMER-ID");
1790        assert_eq!(field.level, 1);
1791        assert!(matches!(field.kind, FieldKind::Alphanum { len: 10 }));
1792    }
1793
1794    #[test]
1795    fn test_numeric_field_parsing() {
1796        let input = "01 AMOUNT PIC S9(7)V99.";
1797
1798        // Debug: test tokenization
1799        let mut lexer = crate::lexer::Lexer::new(input);
1800        let tokens = lexer.tokenize();
1801        for (i, token) in tokens.iter().enumerate() {
1802            println!("Token {}: {:?}", i, token.token);
1803        }
1804
1805        // Debug: test PIC parsing directly
1806        let pic_result = crate::pic::PicClause::parse("S9(7)V99");
1807        println!("PIC parse result: {:?}", pic_result);
1808
1809        let schema = parse(input).unwrap();
1810
1811        assert_eq!(schema.fields.len(), 1);
1812        let field = &schema.fields[0];
1813        assert_eq!(field.name, "AMOUNT");
1814
1815        // Debug print the actual field kind
1816        println!("Field kind: {:?}", field.kind);
1817
1818        assert!(matches!(
1819            field.kind,
1820            FieldKind::ZonedDecimal {
1821                digits: 9,
1822                scale: 2,
1823                signed: true,
1824                ..
1825            }
1826        ));
1827    }
1828
1829    #[test]
1830    fn test_binary_field_parsing() {
1831        let input = "01 COUNT PIC 9(5) USAGE COMP.";
1832        let schema = parse(input).unwrap();
1833
1834        assert_eq!(schema.fields.len(), 1);
1835        let field = &schema.fields[0];
1836        assert!(matches!(
1837            field.kind,
1838            FieldKind::BinaryInt {
1839                bits: 32,
1840                signed: false
1841            }
1842        ));
1843    }
1844
1845    #[test]
1846    fn test_occurs_parsing() {
1847        let input = "01 ARRAY-FIELD PIC X(10) OCCURS 5 TIMES.";
1848        let schema = parse(input).unwrap();
1849
1850        assert_eq!(schema.fields.len(), 1);
1851        let field = &schema.fields[0];
1852        assert!(matches!(field.occurs, Some(Occurs::Fixed { count: 5 })));
1853    }
1854
1855    #[test]
1856    fn test_redefines_parsing() {
1857        let input = r"
185801 FIELD-A PIC X(10).
185901 FIELD-B REDEFINES FIELD-A PIC 9(10).
1860";
1861        let schema = parse(input).unwrap();
1862
1863        assert_eq!(schema.fields.len(), 2);
1864        let field_b = &schema.fields[1];
1865        assert_eq!(field_b.redefines_of, Some("FIELD-A".to_string()));
1866    }
1867
1868    #[test]
1869    fn test_edited_pic_acceptance() {
1870        // Phase E1: Edited PIC is now supported and should parse successfully
1871        // Note: Complex decimal patterns not yet supported; using simple pattern
1872        let input = "01 AMOUNT PIC ZZZZ.";
1873        let result = parse(input);
1874
1875        assert!(result.is_ok(), "Edited PIC should parse successfully");
1876        let schema = result.unwrap();
1877        assert_eq!(schema.fields.len(), 1);
1878        assert!(matches!(
1879            schema.fields[0].kind,
1880            FieldKind::EditedNumeric { .. }
1881        ));
1882    }
1883
1884    #[test]
1885    fn test_sign_clause_without_separate_rejected() {
1886        // SIGN LEADING without SEPARATE is invalid — overpunching is handled by S in PIC.
1887        // Skip when sign_separate feature is disabled by env (e.g. COPYBOOK_FF_SIGN_SEPARATE=0).
1888        let flags = FeatureFlags::from_env();
1889        if !flags.is_enabled(Feature::SignSeparate) {
1890            return;
1891        }
1892        let input = "01 AMOUNT PIC S9(5) SIGN LEADING.";
1893        let result = parse(input);
1894
1895        assert!(
1896            result.is_err(),
1897            "SIGN clause without SEPARATE should be rejected"
1898        );
1899        let error = result.unwrap_err();
1900        // SIGN SEPARATE is enabled by default; rejection is a syntax error, not feature-flag error
1901        assert_eq!(error.code, ErrorCode::CBKP001_SYNTAX);
1902    }
1903
1904    #[test]
1905    fn test_sign_leading_separate_accepted() {
1906        // SIGN IS LEADING SEPARATE is always accepted (promoted to stable)
1907        // Skip when sign_separate feature is disabled by env (e.g. COPYBOOK_FF_SIGN_SEPARATE=0).
1908        let flags = FeatureFlags::from_env();
1909        if !flags.is_enabled(Feature::SignSeparate) {
1910            return;
1911        }
1912        let input = "01 AMOUNT PIC S9(5) SIGN IS LEADING SEPARATE.";
1913        let result = parse(input);
1914        assert!(
1915            result.is_ok(),
1916            "SIGN IS LEADING SEPARATE should be accepted"
1917        );
1918    }
1919
1920    #[test]
1921    fn test_sign_trailing_separate_accepted() {
1922        // SIGN TRAILING SEPARATE is always accepted (promoted to stable)
1923        // Skip when sign_separate feature is disabled by env (e.g. COPYBOOK_FF_SIGN_SEPARATE=0).
1924        let flags = FeatureFlags::from_env();
1925        if !flags.is_enabled(Feature::SignSeparate) {
1926            return;
1927        }
1928        let input = "01 AMOUNT PIC S9(5) SIGN TRAILING SEPARATE.";
1929        let result = parse(input);
1930        assert!(result.is_ok(), "SIGN TRAILING SEPARATE should be accepted");
1931    }
1932
1933    #[test]
1934    fn test_schema_fingerprint() {
1935        let input = "01 CUSTOMER-ID PIC X(10).";
1936        let schema = parse(input).unwrap();
1937
1938        // Should have a non-empty fingerprint
1939        assert!(!schema.fingerprint.is_empty());
1940        assert_ne!(schema.fingerprint, "placeholder");
1941
1942        // Same input should produce same fingerprint
1943        let schema2 = parse(input).unwrap();
1944        assert_eq!(schema.fingerprint, schema2.fingerprint);
1945    }
1946
1947    #[test]
1948    fn test_duplicate_name_handling() {
1949        let input = r"
195001 RECORD-A.
1951   05 FIELD-NAME PIC X(10).
1952   05 FIELD-NAME PIC 9(5).
1953";
1954        let schema = parse(input).unwrap();
1955
1956        // Should have one root field with hierarchical structure
1957        assert_eq!(schema.fields.len(), 1);
1958        let root = &schema.fields[0];
1959        assert_eq!(root.name, "RECORD-A");
1960        assert!(matches!(root.kind, FieldKind::Group));
1961
1962        // Root should have 2 children with disambiguated names
1963        assert_eq!(root.children.len(), 2);
1964        assert_eq!(root.children[0].name, "FIELD-NAME");
1965        assert_eq!(root.children[1].name, "FIELD-NAME__dup2");
1966    }
1967
1968    #[test]
1969    fn test_parent_child_same_name_does_not_get_dup_suffix() {
1970        let input = "01 U-J.\n   05 U-J PIC X(5).";
1971        let schema = parse(input).unwrap();
1972        let group = &schema.fields[0];
1973        let child = &group.children[0];
1974        assert_eq!(child.name, "U-J");
1975        assert_eq!(child.path, "U-J.U-J");
1976    }
1977
1978    #[test]
1979    fn test_odo_validation() {
1980        let input = r"
198101 COUNTER PIC 9(3).
198201 ARRAY-FIELD PIC X(10) OCCURS 5 TIMES DEPENDING ON COUNTER.
1983";
1984        let result = parse(input);
1985
1986        // Should succeed with valid ODO structure
1987        assert!(result.is_ok());
1988        let schema = result.unwrap();
1989        assert_eq!(schema.fields.len(), 2);
1990
1991        // Check ODO field
1992        let odo_field = &schema.fields[1];
1993        if let Some(Occurs::ODO {
1994            max, counter_path, ..
1995        }) = &odo_field.occurs
1996        {
1997            assert_eq!(*max, 5);
1998            assert_eq!(counter_path, "COUNTER");
1999        } else {
2000            panic!("Expected ODO occurs, got {:?}", odo_field.occurs);
2001        }
2002    }
2003
2004    #[test]
2005    fn test_redefines_validation() {
2006        let input = r"
200701 FIELD-A PIC X(10).
200801 FIELD-B REDEFINES FIELD-A PIC 9(10).
2009";
2010        let result = parse(input);
2011
2012        // Should succeed with valid REDEFINES
2013        assert!(result.is_ok());
2014        let schema = result.unwrap();
2015        assert_eq!(schema.fields.len(), 2);
2016
2017        let field_b = &schema.fields[1];
2018        assert_eq!(field_b.redefines_of, Some("FIELD-A".to_string()));
2019    }
2020
2021    #[test]
2022    fn test_hierarchical_structure() {
2023        let input = r"
202401 CUSTOMER-RECORD.
2025   05 CUSTOMER-ID PIC X(10).
2026   05 CUSTOMER-NAME.
2027      10 FIRST-NAME PIC X(20).
2028      10 LAST-NAME PIC X(30).
2029   05 CUSTOMER-ADDRESS.
2030      10 STREET PIC X(38).
2031      10 CITY PIC X(30).
2032";
2033        let schema = parse(input).unwrap();
2034
2035        // Should have one root field
2036        assert_eq!(schema.fields.len(), 1);
2037        let root = &schema.fields[0];
2038        assert_eq!(root.name, "CUSTOMER-RECORD");
2039        assert!(matches!(root.kind, FieldKind::Group));
2040
2041        // Root should have 3 children
2042        assert_eq!(root.children.len(), 3);
2043
2044        // Check first child
2045        let customer_id = &root.children[0];
2046        assert_eq!(customer_id.name, "CUSTOMER-ID");
2047        assert_eq!(customer_id.path, "CUSTOMER-RECORD.CUSTOMER-ID");
2048        assert!(matches!(customer_id.kind, FieldKind::Alphanum { len: 10 }));
2049
2050        // Check second child (group)
2051        let customer_name = &root.children[1];
2052        assert_eq!(customer_name.name, "CUSTOMER-NAME");
2053        assert_eq!(customer_name.path, "CUSTOMER-RECORD.CUSTOMER-NAME");
2054        assert!(matches!(customer_name.kind, FieldKind::Group));
2055        assert_eq!(customer_name.children.len(), 2);
2056
2057        // Check nested children
2058        let first_name = &customer_name.children[0];
2059        assert_eq!(first_name.name, "FIRST-NAME");
2060        assert_eq!(first_name.path, "CUSTOMER-RECORD.CUSTOMER-NAME.FIRST-NAME");
2061    }
2062
2063    #[test]
2064    fn test_sha256_fingerprint() {
2065        let input = "01 CUSTOMER-ID PIC X(10).";
2066        let schema = parse(input).unwrap();
2067
2068        // Should have a SHA-256 fingerprint (64 hex characters)
2069        assert_eq!(schema.fingerprint.len(), 64);
2070        assert!(schema.fingerprint.chars().all(|c| c.is_ascii_hexdigit()));
2071
2072        // Same input should produce same fingerprint
2073        let schema2 = parse(input).unwrap();
2074        assert_eq!(schema.fingerprint, schema2.fingerprint);
2075
2076        // Different input should produce different fingerprint
2077        let input2 = "01 CUSTOMER-ID PIC X(20).";
2078        let schema3 = parse(input2).unwrap();
2079        assert_ne!(schema.fingerprint, schema3.fingerprint);
2080    }
2081
2082    #[test]
2083    fn test_invalid_redefines_target() {
2084        let input = r"
208501 FIELD-A PIC X(10).
208601 FIELD-B REDEFINES NONEXISTENT PIC 9(10).
2087";
2088        let result = parse(input);
2089
2090        // Should fail with invalid REDEFINES target
2091        assert!(result.is_err());
2092        assert!(matches!(
2093            result.unwrap_err().code,
2094            ErrorCode::CBKP001_SYNTAX
2095        ));
2096    }
2097
2098    #[test]
2099    fn test_blank_when_zero() {
2100        let input = "01 AMOUNT PIC 9(5) BLANK WHEN ZERO.";
2101        let schema = parse(input).unwrap();
2102
2103        assert_eq!(schema.fields.len(), 1);
2104        let field = &schema.fields[0];
2105        assert!(field.blank_when_zero);
2106    }
2107
2108    #[test]
2109    fn test_synchronized_field() {
2110        let input = "01 BINARY-FIELD PIC 9(5) USAGE COMP SYNCHRONIZED.";
2111        let schema = parse(input).unwrap();
2112
2113        assert_eq!(schema.fields.len(), 1);
2114        let field = &schema.fields[0];
2115        assert!(field.synchronized);
2116        assert!(matches!(field.kind, FieldKind::BinaryInt { .. }));
2117    }
2118
2119    #[test]
2120    fn test_edited_pic_ambiguous_field_name() {
2121        // "Z-Z" is tokenized as EditedPic but is a valid COBOL data name
2122        let input = "01 Z-Z PIC X(10).";
2123        let schema = parse(input).expect("should parse Z-Z as field name");
2124        assert_eq!(schema.fields.len(), 1);
2125        assert_eq!(schema.fields[0].name, "Z-Z");
2126    }
2127
2128    #[test]
2129    fn test_pic_clause_ambiguous_field_name() {
2130        // "ZZ" is tokenized as PicClause/EditedPic but is a valid COBOL data name
2131        let input = "01 ZZ PIC X(5).";
2132        let schema = parse(input).expect("should parse ZZ as field name");
2133        assert_eq!(schema.fields.len(), 1);
2134        assert_eq!(schema.fields[0].name, "ZZ");
2135    }
2136
2137    #[test]
2138    fn test_redefines_target_ambiguous_name() {
2139        let input = "01 Z-Z PIC X(10).\n01 ALT REDEFINES Z-Z PIC X(10).";
2140        let schema = parse(input).expect("should parse REDEFINES Z-Z");
2141        assert_eq!(schema.fields[1].redefines_of.as_deref(), Some("Z-Z"));
2142    }
2143
2144    #[test]
2145    fn test_depending_on_ambiguous_name() {
2146        let input = "01 Z-Z PIC 9(3).\n01 ITEMS PIC X(5) OCCURS 5 TIMES DEPENDING ON Z-Z.";
2147        let schema = parse(input).expect("should parse DEPENDING ON Z-Z");
2148        let items = &schema.fields[1];
2149        match &items.occurs {
2150            Some(Occurs::ODO { counter_path, .. }) => {
2151                assert_eq!(counter_path, "Z-Z");
2152            }
2153            other => panic!("Expected ODO occurs, got {:?}", other),
2154        }
2155    }
2156}