Skip to main content

chipi_core/
parser.rs

1//! DSL parsing for instruction definitions.
2//!
3//! This module parses `.chipi` files (or source strings) into an intermediate representation.
4//! The parser handles:
5//! - Decoder configuration (name, width, bit order)
6//! - Type aliases for custom field types
7//! - Instruction definitions with fixed bit patterns and variable fields
8//!
9//! Bit ranges are converted from DSL notation (where the order depends on bit_order config)
10//! to hardware notation (LSB=0) during validation.
11
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15use crate::error::{Error, ErrorKind, Span};
16use crate::format_parser::parse_format_string;
17use crate::types::*;
18
19/// Parse DSL source text into a decoder definition.
20///
21/// # Arguments
22///
23/// * `source` - DSL source code
24/// * `filename` - Name for error reporting (e.g., "my_arch.chipi")
25///
26/// # Returns
27///
28/// A `DecoderDef` on success, or a vector of parse errors.
29pub fn parse(source: &str, filename: &str) -> Result<DecoderDef, Vec<Error>> {
30    let mut parser = Parser::new(source, filename);
31    parser.parse_file()
32}
33
34/// Parse a `.chipi` file from a path, resolving `include` directives relative to its directory.
35pub fn parse_file(path: &Path) -> Result<DecoderDef, Vec<Error>> {
36    let mut include_guard = HashSet::new();
37    parse_file_with_includes(path, &mut include_guard)
38}
39
40/// Parse a `.chipi` file and return both the definition and the set of all
41/// resolved file paths (input + includes). Useful for `cargo:rerun-if-changed`.
42pub fn parse_file_with_deps(path: &Path) -> Result<(DecoderDef, Vec<PathBuf>), Vec<Error>> {
43    let mut include_guard = HashSet::new();
44    let def = parse_file_with_includes(path, &mut include_guard)?;
45    let deps: Vec<PathBuf> = include_guard.into_iter().collect();
46    Ok((def, deps))
47}
48
49fn parse_file_with_includes(
50    path: &Path,
51    include_guard: &mut HashSet<PathBuf>,
52) -> Result<DecoderDef, Vec<Error>> {
53    let canonical = path.canonicalize().map_err(|_| {
54        vec![Error::new(
55            ErrorKind::IncludeNotFound(path.display().to_string()),
56            Span::new(&path.display().to_string(), 1, 1, 0),
57        )]
58    })?;
59
60    if !include_guard.insert(canonical.clone()) {
61        return Err(vec![Error::new(
62            ErrorKind::CircularInclude(path.display().to_string()),
63            Span::new(&path.display().to_string(), 1, 1, 0),
64        )]);
65    }
66
67    let source = std::fs::read_to_string(&canonical).map_err(|_| {
68        vec![Error::new(
69            ErrorKind::IncludeNotFound(path.display().to_string()),
70            Span::new(&path.display().to_string(), 1, 1, 0),
71        )]
72    })?;
73
74    let filename = path
75        .file_name()
76        .and_then(|f| f.to_str())
77        .unwrap_or("unknown");
78
79    let base_dir = canonical.parent().unwrap_or(Path::new("."));
80
81    // First pass: find and resolve includes
82    let mut included_sub_decoders = Vec::new();
83    let mut included_maps = Vec::new();
84    let mut included_type_aliases = Vec::new();
85
86    for line in source.lines() {
87        let trimmed = line.trim();
88        if let Some(rest) = trimmed.strip_prefix("include ") {
89            let rest = rest.trim();
90            if let Some(inc_path_str) = extract_quoted_string(rest) {
91                let inc_path = base_dir.join(inc_path_str);
92                let inc_def = parse_file_with_includes(&inc_path, include_guard)?;
93                included_sub_decoders.extend(inc_def.sub_decoders);
94                included_maps.extend(inc_def.maps);
95                included_type_aliases.extend(inc_def.type_aliases);
96            }
97        }
98    }
99
100    // Second pass: parse this file
101    let mut def = parse(&source, filename)?;
102    // Merge included content
103    def.sub_decoders.extend(included_sub_decoders);
104    def.maps.extend(included_maps);
105    def.type_aliases.extend(included_type_aliases);
106
107    Ok(def)
108}
109
110struct Parser<'a> {
111    filename: String,
112    lines: Vec<&'a str>,
113    line_idx: usize,
114    errors: Vec<Error>,
115}
116
117impl<'a> Parser<'a> {
118    fn new(source: &'a str, filename: &str) -> Self {
119        let lines: Vec<&str> = source.lines().collect();
120        Parser {
121            filename: filename.to_string(),
122            lines,
123            line_idx: 0,
124            errors: Vec::new(),
125        }
126    }
127
128    fn span(&self, col: usize, len: usize) -> Span {
129        Span::new(&self.filename, self.line_idx + 1, col + 1, len)
130    }
131
132    fn advance(&mut self) {
133        self.line_idx += 1;
134    }
135
136    fn parse_file(&mut self) -> Result<DecoderDef, Vec<Error>> {
137        let mut config: Option<DecoderConfig> = None;
138        let mut type_aliases = Vec::new();
139        let mut maps = Vec::new();
140        let mut instructions = Vec::new();
141        let mut sub_decoders = Vec::new();
142        // Which sub-decoder are we currently collecting instructions for (None = main decoder)
143        let mut current_subdecoder: Option<usize> = None;
144
145        while self.line_idx < self.lines.len() {
146            let line = self.lines[self.line_idx];
147            let trimmed = line.trim();
148
149            if trimmed.is_empty() || trimmed.starts_with('#') {
150                self.advance();
151                continue;
152            }
153
154            // Skip include lines (handled in parse_file_with_includes)
155            if trimmed.starts_with("include ") {
156                self.advance();
157                continue;
158            }
159
160            if trimmed.starts_with("decoder ") {
161                current_subdecoder = None;
162                match self.parse_decoder_block() {
163                    Ok(cfg) => config = Some(cfg),
164                    Err(e) => self.errors.push(e),
165                }
166            } else if trimmed.starts_with("subdecoder ") {
167                match self.parse_subdecoder_block() {
168                    Ok(sd) => {
169                        sub_decoders.push(sd);
170                        current_subdecoder = Some(sub_decoders.len() - 1);
171                    }
172                    Err(e) => self.errors.push(e),
173                }
174            } else if trimmed.starts_with("type ") {
175                match self.parse_type_alias(trimmed) {
176                    Ok(ta) => type_aliases.push(ta),
177                    Err(e) => self.errors.push(e),
178                }
179                self.advance();
180            } else if trimmed.starts_with("map ") {
181                match self.parse_map_block() {
182                    Ok(map) => {
183                        if let Some(sd_idx) = current_subdecoder {
184                            sub_decoders[sd_idx].maps.push(map);
185                        } else {
186                            maps.push(map);
187                        }
188                    }
189                    Err(e) => self.errors.push(e),
190                }
191            } else {
192                // Instruction line
193                match self.parse_instruction(trimmed) {
194                    Ok(instr) => {
195                        if let Some(sd_idx) = current_subdecoder {
196                            // Sub-decoder instruction: parse fragment lines
197                            let sub_instr_name = instr.name.clone();
198                            let sub_instr_segments = instr.segments.clone();
199                            let sub_instr_span = instr.span.clone();
200                            self.advance();
201                            let fragments = self.parse_fragment_lines();
202                            sub_decoders[sd_idx].instructions.push(SubInstructionDef {
203                                name: sub_instr_name,
204                                segments: sub_instr_segments,
205                                fragments,
206                                span: sub_instr_span,
207                            });
208                        } else {
209                            instructions.push(instr);
210                            self.advance();
211                            let format_lines = self.parse_format_lines();
212                            if let Some(last) = instructions.last_mut() {
213                                last.format_lines = format_lines;
214                            }
215                        }
216                    }
217                    Err(e) => {
218                        self.errors.push(e);
219                        self.advance();
220                    }
221                }
222                continue; // already advanced
223            }
224        }
225
226        if !self.errors.is_empty() {
227            return Err(self.errors.clone());
228        }
229
230        let config = match config {
231            Some(c) => c,
232            None => {
233                // If we only have sub-decoders (included file), create a dummy config
234                if !sub_decoders.is_empty() && instructions.is_empty() {
235                    DecoderConfig {
236                        name: String::new(),
237                        width: 0,
238                        bit_order: BitOrder::Msb0,
239                        endian: ByteEndian::Big,
240                        max_units: None,
241                        span: Span::new(&self.filename, 1, 1, 0),
242                    }
243                } else {
244                    return Err(vec![Error::new(
245                        ErrorKind::MissingDecoderBlock,
246                        Span::new(&self.filename, 1, 1, 0),
247                    )]);
248                }
249            }
250        };
251
252        Ok(DecoderDef {
253            config,
254            type_aliases,
255            maps,
256            instructions,
257            sub_decoders,
258        })
259    }
260
261    fn parse_decoder_block(&mut self) -> Result<DecoderConfig, Error> {
262        let first_line = self.lines[self.line_idx].trim();
263        let block_start_line = self.line_idx;
264
265        let rest = first_line.strip_prefix("decoder ").unwrap().trim();
266        let name = rest
267            .strip_suffix('{')
268            .map(|s| s.trim())
269            .unwrap_or(rest)
270            .to_string();
271
272        if name.is_empty() {
273            return Err(Error::new(
274                ErrorKind::ExpectedToken("decoder name".to_string()),
275                self.span(8, 1),
276            ));
277        }
278
279        self.advance();
280
281        let mut width: Option<u32> = None;
282        let mut bit_order: Option<BitOrder> = None;
283        let mut endian: Option<ByteEndian> = None;
284        let mut max_units: Option<u32> = None;
285
286        // Parse body lines until '}'
287        while self.line_idx < self.lines.len() {
288            let line = self.lines[self.line_idx].trim();
289
290            if line == "}" {
291                self.advance();
292                break;
293            }
294
295            if line.is_empty() || line.starts_with('#') {
296                self.advance();
297                continue;
298            }
299
300            if let Some(val) = line.strip_prefix("width") {
301                let val = val
302                    .trim()
303                    .strip_prefix('=')
304                    .map(|s| s.trim())
305                    .unwrap_or(val.trim());
306                match val.parse::<u32>() {
307                    Ok(w) if w == 8 || w == 16 || w == 32 => width = Some(w),
308                    Ok(w) => {
309                        return Err(Error::new(
310                            ErrorKind::InvalidWidth(w),
311                            self.span(0, line.len()),
312                        ));
313                    }
314                    Err(_) => {
315                        return Err(Error::new(
316                            ErrorKind::ExpectedToken("integer width (8, 16, or 32)".to_string()),
317                            self.span(0, line.len()),
318                        ));
319                    }
320                }
321            } else if let Some(val) = line.strip_prefix("bit_order") {
322                let val = val
323                    .trim()
324                    .strip_prefix('=')
325                    .map(|s| s.trim())
326                    .unwrap_or(val.trim());
327                match val {
328                    "msb0" => bit_order = Some(BitOrder::Msb0),
329                    "lsb0" => bit_order = Some(BitOrder::Lsb0),
330                    _ => {
331                        return Err(Error::new(
332                            ErrorKind::ExpectedToken("msb0 or lsb0".to_string()),
333                            self.span(0, line.len()),
334                        ));
335                    }
336                }
337            } else if let Some(val) = line.strip_prefix("endian") {
338                let val = val
339                    .trim()
340                    .strip_prefix('=')
341                    .map(|s| s.trim())
342                    .unwrap_or(val.trim());
343                match val {
344                    "big" => endian = Some(ByteEndian::Big),
345                    "little" => endian = Some(ByteEndian::Little),
346                    _ => {
347                        return Err(Error::new(
348                            ErrorKind::ExpectedToken("big or little".to_string()),
349                            self.span(0, line.len()),
350                        ));
351                    }
352                }
353            } else if let Some(val) = line.strip_prefix("max_units") {
354                let val = val
355                    .trim()
356                    .strip_prefix('=')
357                    .map(|s| s.trim())
358                    .unwrap_or(val.trim());
359                match val.parse::<u32>() {
360                    Ok(m) if m > 0 => max_units = Some(m),
361                    Ok(_) => {
362                        return Err(Error::new(
363                            ErrorKind::ExpectedToken("positive integer for max_units".to_string()),
364                            self.span(0, line.len()),
365                        ));
366                    }
367                    Err(_) => {
368                        return Err(Error::new(
369                            ErrorKind::ExpectedToken("positive integer for max_units".to_string()),
370                            self.span(0, line.len()),
371                        ));
372                    }
373                }
374            }
375
376            self.advance();
377        }
378
379        let width = width.unwrap_or(32);
380        let bit_order = bit_order.unwrap_or(BitOrder::Msb0);
381        let endian = endian.unwrap_or(ByteEndian::Big);
382
383        Ok(DecoderConfig {
384            name,
385            width,
386            bit_order,
387            endian,
388            max_units,
389            span: Span::new(&self.filename, block_start_line + 1, 1, 0),
390        })
391    }
392
393    /// Parse a `subdecoder Name { ... }` block.
394    fn parse_subdecoder_block(&mut self) -> Result<SubDecoderDef, Error> {
395        let first_line = self.lines[self.line_idx].trim();
396        let block_start_line = self.line_idx;
397
398        let rest = first_line.strip_prefix("subdecoder ").unwrap().trim();
399        let name = rest
400            .strip_suffix('{')
401            .map(|s| s.trim())
402            .unwrap_or(rest)
403            .to_string();
404
405        if name.is_empty() {
406            return Err(Error::new(
407                ErrorKind::ExpectedToken("subdecoder name".to_string()),
408                self.span(12, 1),
409            ));
410        }
411
412        self.advance();
413
414        let mut width: Option<u32> = None;
415        let mut bit_order: Option<BitOrder> = None;
416
417        while self.line_idx < self.lines.len() {
418            let line = self.lines[self.line_idx].trim();
419
420            if line == "}" {
421                self.advance();
422                break;
423            }
424
425            if line.is_empty() || line.starts_with('#') {
426                self.advance();
427                continue;
428            }
429
430            if let Some(val) = line.strip_prefix("width") {
431                let val = val
432                    .trim()
433                    .strip_prefix('=')
434                    .map(|s| s.trim())
435                    .unwrap_or(val.trim());
436                match val.parse::<u32>() {
437                    Ok(w) if w == 8 || w == 16 || w == 32 => width = Some(w),
438                    Ok(w) => {
439                        return Err(Error::new(
440                            ErrorKind::InvalidWidth(w),
441                            self.span(0, line.len()),
442                        ));
443                    }
444                    Err(_) => {
445                        return Err(Error::new(
446                            ErrorKind::ExpectedToken("integer width (8, 16, or 32)".to_string()),
447                            self.span(0, line.len()),
448                        ));
449                    }
450                }
451            } else if let Some(val) = line.strip_prefix("bit_order") {
452                let val = val
453                    .trim()
454                    .strip_prefix('=')
455                    .map(|s| s.trim())
456                    .unwrap_or(val.trim());
457                match val {
458                    "msb0" => bit_order = Some(BitOrder::Msb0),
459                    "lsb0" => bit_order = Some(BitOrder::Lsb0),
460                    _ => {
461                        return Err(Error::new(
462                            ErrorKind::ExpectedToken("msb0 or lsb0".to_string()),
463                            self.span(0, line.len()),
464                        ));
465                    }
466                }
467            }
468
469            self.advance();
470        }
471
472        let width = width.unwrap_or(8);
473        let bit_order = bit_order.unwrap_or(BitOrder::Msb0);
474
475        Ok(SubDecoderDef {
476            name,
477            width,
478            bit_order,
479            maps: Vec::new(),
480            instructions: Vec::new(),
481            span: Span::new(&self.filename, block_start_line + 1, 1, 0),
482        })
483    }
484
485    /// Parse fragment lines (`| .name = "template"`) following a sub-decoder instruction.
486    fn parse_fragment_lines(&mut self) -> Vec<FragmentLine> {
487        let mut fragments = Vec::new();
488
489        while self.line_idx < self.lines.len() {
490            let line = self.lines[self.line_idx];
491            let trimmed = line.trim();
492
493            if !trimmed.starts_with('|') {
494                break;
495            }
496
497            let content = trimmed[1..].trim();
498            let span = self.span(0, line.len());
499
500            match self.parse_single_fragment_line(content, &span) {
501                Ok(fl) => fragments.push(fl),
502                Err(e) => self.errors.push(e),
503            }
504
505            self.advance();
506        }
507
508        fragments
509    }
510
511    /// Parse a single fragment line: `.name = "template"`
512    fn parse_single_fragment_line(
513        &self,
514        content: &str,
515        span: &Span,
516    ) -> Result<FragmentLine, Error> {
517        // Expect: .name = "template"
518        if !content.starts_with('.') {
519            return Err(Error::new(
520                ErrorKind::InvalidFormatString(
521                    "sub-decoder format line must start with '.name = \"...\"'".to_string(),
522                ),
523                span.clone(),
524            ));
525        }
526
527        let rest = &content[1..]; // skip '.'
528        let eq_pos = rest.find('=').ok_or_else(|| {
529            Error::new(
530                ErrorKind::ExpectedToken("'=' in fragment line".to_string()),
531                span.clone(),
532            )
533        })?;
534
535        let name = rest[..eq_pos].trim().to_string();
536        let rhs = rest[eq_pos + 1..].trim();
537
538        let fmt_str = extract_quoted_string(rhs).ok_or_else(|| {
539            Error::new(
540                ErrorKind::InvalidFormatString(
541                    "fragment line must contain a quoted string".to_string(),
542                ),
543                span.clone(),
544            )
545        })?;
546
547        let pieces = parse_format_string(fmt_str, span)?;
548
549        Ok(FragmentLine {
550            name,
551            pieces,
552            span: span.clone(),
553        })
554    }
555
556    fn parse_type_alias(&self, line: &str) -> Result<TypeAlias, Error> {
557        let rest = line.strip_prefix("type ").unwrap().trim();
558
559        let eq_pos = rest.find('=').ok_or_else(|| {
560            Error::new(
561                ErrorKind::ExpectedToken("'=' in type alias".to_string()),
562                self.span(5, rest.len()),
563            )
564        })?;
565
566        let name = rest[..eq_pos].trim().to_string();
567        let rhs = rest[eq_pos + 1..].trim();
568
569        let (base_str, transforms, display_format) = if let Some(brace_pos) = rhs.find('{') {
570            let close = rhs.rfind('}').ok_or_else(|| {
571                Error::new(
572                    ErrorKind::ExpectedToken("closing '}'".to_string()),
573                    self.span(0, line.len()),
574                )
575            })?;
576            let transforms_str = &rhs[brace_pos + 1..close];
577            let (transforms, display_format) = self.parse_transforms(transforms_str)?;
578            (rhs[..brace_pos].trim(), transforms, display_format)
579        } else {
580            (rhs, Vec::new(), None)
581        };
582
583        let base_type = base_str.to_string();
584
585        Ok(TypeAlias {
586            name,
587            base_type,
588            transforms,
589            display_format,
590            span: self.span(0, line.len()),
591        })
592    }
593
594    fn parse_transforms(&self, s: &str) -> Result<(Vec<Transform>, Option<DisplayFormat>), Error> {
595        let mut transforms = Vec::new();
596        let mut display_format = None;
597        for part in s.split(',') {
598            let part = part.trim();
599            if part.is_empty() {
600                continue;
601            }
602            if let Some(inner) = part
603                .strip_prefix("sign_extend(")
604                .and_then(|s| s.strip_suffix(')'))
605            {
606                let n: u32 = inner.trim().parse().map_err(|_| {
607                    Error::new(
608                        ErrorKind::ExpectedToken("integer for sign_extend".to_string()),
609                        self.span(0, 0),
610                    )
611                })?;
612                transforms.push(Transform::SignExtend(n));
613            } else if let Some(inner) = part
614                .strip_prefix("zero_extend(")
615                .and_then(|s| s.strip_suffix(')'))
616            {
617                let n: u32 = inner.trim().parse().map_err(|_| {
618                    Error::new(
619                        ErrorKind::ExpectedToken("integer for zero_extend".to_string()),
620                        self.span(0, 0),
621                    )
622                })?;
623                transforms.push(Transform::ZeroExtend(n));
624            } else if let Some(inner) = part
625                .strip_prefix("shift_left(")
626                .and_then(|s| s.strip_suffix(')'))
627            {
628                let n: u32 = inner.trim().parse().map_err(|_| {
629                    Error::new(
630                        ErrorKind::ExpectedToken("integer for shift_left".to_string()),
631                        self.span(0, 0),
632                    )
633                })?;
634                transforms.push(Transform::ShiftLeft(n));
635            } else if let Some(inner) = part
636                .strip_prefix("display(")
637                .and_then(|s| s.strip_suffix(')'))
638            {
639                let fmt = match inner.trim() {
640                    "signed_hex" => DisplayFormat::SignedHex,
641                    "hex" => DisplayFormat::Hex,
642                    other => {
643                        return Err(Error::new(
644                            ErrorKind::UnexpectedToken(format!(
645                                "unknown display format: {}",
646                                other
647                            )),
648                            self.span(0, part.len()),
649                        ));
650                    }
651                };
652                display_format = Some(fmt);
653            } else {
654                return Err(Error::new(
655                    ErrorKind::UnexpectedToken(part.to_string()),
656                    self.span(0, part.len()),
657                ));
658            }
659        }
660        Ok((transforms, display_format))
661    }
662
663    fn parse_instruction(&self, line: &str) -> Result<InstructionDef, Error> {
664        // Example: add rd:reg[6:10] ra:reg[11:15] rb:reg[16:20] [21:30]=0100001010 rc:u1[31]
665        // First token is the instruction name
666        let name_end = line.find(|c: char| c.is_whitespace()).unwrap_or(line.len());
667        let name = line[..name_end].to_string();
668        let rest = line[name_end..].trim();
669
670        let segments = self.parse_segments(rest)?;
671
672        Ok(InstructionDef {
673            name,
674            segments,
675            format_lines: Vec::new(),
676            span: self.span(0, line.len()),
677        })
678    }
679
680    fn parse_segments(&self, input: &str) -> Result<Vec<Segment>, Error> {
681        let mut segments = Vec::new();
682        let mut pos = 0;
683        let bytes = input.as_bytes();
684
685        while pos < input.len() {
686            while pos < input.len() && input.as_bytes()[pos].is_ascii_whitespace() {
687                pos += 1;
688            }
689            if pos >= input.len() {
690                break;
691            }
692
693            if bytes[pos] == b'[' {
694                let seg = self.parse_fixed_segment(input, &mut pos)?;
695                segments.push(seg);
696            } else {
697                // Field segment: name:type[start:end]
698                let seg = self.parse_field_segment(input, &mut pos)?;
699                segments.push(seg);
700            }
701        }
702
703        Ok(segments)
704    }
705
706    fn parse_fixed_segment(&self, input: &str, pos: &mut usize) -> Result<Segment, Error> {
707        let (range, _) = self.parse_bit_range(input, pos)?;
708
709        // Expect '='
710        if *pos >= input.len() || input.as_bytes()[*pos] != b'=' {
711            return Err(Error::new(
712                ErrorKind::ExpectedToken("'=' after bit range for fixed segment".to_string()),
713                self.span(*pos, 1),
714            ));
715        }
716        *pos += 1;
717
718        // Parse binary pattern (0, 1, or ?)
719        let start = *pos;
720        while *pos < input.len()
721            && (input.as_bytes()[*pos] == b'0'
722                || input.as_bytes()[*pos] == b'1'
723                || input.as_bytes()[*pos] == b'?')
724        {
725            *pos += 1;
726        }
727        let pattern_str = &input[start..*pos];
728        if pattern_str.is_empty() {
729            return Err(Error::new(
730                ErrorKind::InvalidBitPattern("empty pattern".to_string()),
731                self.span(start, 1),
732            ));
733        }
734
735        let pattern: Vec<Bit> = pattern_str
736            .chars()
737            .map(|c| match c {
738                '0' => Bit::Zero,
739                '1' => Bit::One,
740                '?' => Bit::Wildcard,
741                _ => unreachable!(),
742            })
743            .collect();
744
745        Ok(Segment::Fixed {
746            ranges: vec![range], // Single-unit range for now
747            pattern,
748            span: self.span(start, pattern_str.len()),
749        })
750    }
751
752    fn parse_field_segment(&self, input: &str, pos: &mut usize) -> Result<Segment, Error> {
753        // name:type[range] or name:type{transforms}[range]
754        let name_start = *pos;
755
756        // Parse field name (up to ':')
757        while *pos < input.len() && input.as_bytes()[*pos] != b':' {
758            *pos += 1;
759        }
760        let name = input[name_start..*pos].to_string();
761
762        if *pos >= input.len() || input.as_bytes()[*pos] != b':' {
763            return Err(Error::new(
764                ErrorKind::ExpectedToken("':' after field name".to_string()),
765                self.span(*pos, 1),
766            ));
767        }
768        *pos += 1; // skip ':'
769
770        // Parse type (up to '[' or '{')
771        let type_start = *pos;
772        while *pos < input.len() && input.as_bytes()[*pos] != b'[' && input.as_bytes()[*pos] != b'{'
773        {
774            *pos += 1;
775        }
776        let type_name = input[type_start..*pos].trim().to_string();
777
778        // Check for inline transforms
779        let transforms = if *pos < input.len() && input.as_bytes()[*pos] == b'{' {
780            *pos += 1; // skip '{'
781            let brace_start = *pos;
782            while *pos < input.len() && input.as_bytes()[*pos] != b'}' {
783                *pos += 1;
784            }
785            let transforms_str = &input[brace_start..*pos];
786            if *pos < input.len() {
787                *pos += 1; // skip '}'
788            }
789            Some(self.parse_transforms(transforms_str)?.0)
790        } else {
791            None
792        };
793
794        // Parse bit range
795        let (range, _) = self.parse_bit_range(input, pos)?;
796
797        let field_type = match transforms {
798            Some(t) => FieldType::Inline {
799                base_type: type_name,
800                transforms: t,
801            },
802            None => {
803                // Could be an alias or a builtin inline type
804                if is_builtin_type(&type_name) {
805                    FieldType::Inline {
806                        base_type: type_name,
807                        transforms: Vec::new(),
808                    }
809                } else {
810                    FieldType::Alias(type_name)
811                }
812            }
813        };
814
815        Ok(Segment::Field {
816            name,
817            field_type,
818            ranges: vec![range], // Single-unit range for now
819            span: self.span(name_start, *pos - name_start),
820        })
821    }
822
823    /// Parse a `map name(params) { ... }` block.
824    fn parse_map_block(&mut self) -> Result<MapDef, Error> {
825        let first_line = self.lines[self.line_idx].trim();
826        let block_start_line = self.line_idx;
827
828        let rest = first_line.strip_prefix("map ").unwrap().trim();
829
830        // Parse: name(param1, param2) {
831        let paren_pos = rest.find('(').ok_or_else(|| {
832            Error::new(
833                ErrorKind::ExpectedToken("'(' after map name".to_string()),
834                self.span(4, rest.len()),
835            )
836        })?;
837        let name = rest[..paren_pos].trim().to_string();
838
839        let close_paren = rest.find(')').ok_or_else(|| {
840            Error::new(
841                ErrorKind::ExpectedToken("')' in map definition".to_string()),
842                self.span(0, rest.len()),
843            )
844        })?;
845
846        let params_str = &rest[paren_pos + 1..close_paren];
847        let params: Vec<String> = params_str
848            .split(',')
849            .map(|s| s.trim().to_string())
850            .filter(|s| !s.is_empty())
851            .collect();
852
853        // Expect `{` after params
854        let after_paren = rest[close_paren + 1..].trim();
855        if !after_paren.starts_with('{') {
856            return Err(Error::new(
857                ErrorKind::ExpectedToken("'{' after map parameters".to_string()),
858                self.span(0, first_line.len()),
859            ));
860        }
861
862        self.advance();
863
864        let mut entries = Vec::new();
865
866        while self.line_idx < self.lines.len() {
867            let line = self.lines[self.line_idx].trim();
868
869            if line == "}" {
870                self.advance();
871                break;
872            }
873
874            if line.is_empty() || line.starts_with('#') {
875                self.advance();
876                continue;
877            }
878
879            // Parse: key1, key2 => output_text
880            let arrow_pos = line.find("=>").ok_or_else(|| {
881                Error::new(
882                    ErrorKind::ExpectedToken("'=>' in map entry".to_string()),
883                    self.span(0, line.len()),
884                )
885            })?;
886
887            let keys_str = &line[..arrow_pos];
888            let output_raw = line[arrow_pos + 2..].trim();
889
890            // Strip surrounding quotes from map output if present
891            let output_str = extract_quoted_string(output_raw).unwrap_or(output_raw);
892
893            let keys: Vec<MapKey> = keys_str
894                .split(',')
895                .map(|s| {
896                    let s = s.trim();
897                    if s == "_" {
898                        MapKey::Wildcard
899                    } else if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X"))
900                    {
901                        MapKey::Value(i64::from_str_radix(hex, 16).unwrap_or(0))
902                    } else {
903                        MapKey::Value(s.parse::<i64>().unwrap_or(0))
904                    }
905                })
906                .collect();
907
908            let entry_span = self.span(0, line.len());
909            let output = parse_format_string(output_str, &entry_span)?;
910
911            entries.push(MapEntry {
912                keys,
913                output,
914                span: entry_span,
915            });
916
917            self.advance();
918        }
919
920        Ok(MapDef {
921            name,
922            params,
923            entries,
924            span: Span::new(&self.filename, block_start_line + 1, 1, 0),
925        })
926    }
927
928    /// Parse format lines (`| ...`) following an instruction.
929    fn parse_format_lines(&mut self) -> Vec<FormatLine> {
930        let mut format_lines = Vec::new();
931
932        while self.line_idx < self.lines.len() {
933            let line = self.lines[self.line_idx];
934            let trimmed = line.trim();
935
936            if !trimmed.starts_with('|') {
937                break;
938            }
939
940            let content = trimmed[1..].trim();
941            let span = self.span(0, line.len());
942
943            match self.parse_single_format_line(content, &span) {
944                Ok(fl) => format_lines.push(fl),
945                Err(e) => self.errors.push(e),
946            }
947
948            self.advance();
949        }
950
951        format_lines
952    }
953
954    /// Parse a single format line content (after stripping the leading `|`).
955    fn parse_single_format_line(&self, content: &str, span: &Span) -> Result<FormatLine, Error> {
956        // Check if there's a guard: content before first `"` that contains a `:`
957        // Format: `guard_expr: "format string"` or just `"format string"`
958        if let Some(quote_pos) = content.find('"') {
959            let before_quote = &content[..quote_pos];
960            let after_quote = &content[quote_pos..];
961
962            // Extract the quoted format string
963            let fmt_str = extract_quoted_string(after_quote).ok_or_else(|| {
964                Error::new(
965                    ErrorKind::InvalidFormatString("unclosed quote in format line".to_string()),
966                    span.clone(),
967                )
968            })?;
969
970            let guard = if before_quote.trim().is_empty() {
971                None
972            } else {
973                // Strip trailing `:` from guard
974                let guard_str = before_quote.trim().trim_end_matches(':').trim();
975                Some(parse_guard(guard_str, span)?)
976            };
977
978            let pieces = parse_format_string(fmt_str, span)?;
979
980            Ok(FormatLine {
981                guard,
982                pieces,
983                span: span.clone(),
984            })
985        } else {
986            Err(Error::new(
987                ErrorKind::InvalidFormatString(
988                    "format line must contain a quoted string".to_string(),
989                ),
990                span.clone(),
991            ))
992        }
993    }
994
995    /// Parse `[start:end]` or `[N]` bit range notation.
996    fn parse_bit_range(
997        &self,
998        input: &str,
999        pos: &mut usize,
1000    ) -> Result<(BitRange, (u32, u32)), Error> {
1001        if *pos >= input.len() || input.as_bytes()[*pos] != b'[' {
1002            return Err(Error::new(
1003                ErrorKind::ExpectedToken("'[' for bit range".to_string()),
1004                self.span(*pos, 1),
1005            ));
1006        }
1007        *pos += 1;
1008
1009        let num1_start = *pos;
1010        while *pos < input.len() && input.as_bytes()[*pos] != b':' && input.as_bytes()[*pos] != b']'
1011        {
1012            *pos += 1;
1013        }
1014        let num1: u32 = input[num1_start..*pos].trim().parse().map_err(|_| {
1015            Error::new(
1016                ErrorKind::InvalidRange,
1017                self.span(num1_start, *pos - num1_start),
1018            )
1019        })?;
1020
1021        let (dsl_start, dsl_end) = if *pos < input.len() && input.as_bytes()[*pos] == b':' {
1022            *pos += 1;
1023            let num2_start = *pos;
1024            while *pos < input.len() && input.as_bytes()[*pos] != b']' {
1025                *pos += 1;
1026            }
1027            let num2: u32 = input[num2_start..*pos].trim().parse().map_err(|_| {
1028                Error::new(
1029                    ErrorKind::InvalidRange,
1030                    self.span(num2_start, *pos - num2_start),
1031                )
1032            })?;
1033            (num1, num2)
1034        } else {
1035            (num1, num1)
1036        };
1037
1038        if *pos < input.len() && input.as_bytes()[*pos] == b']' {
1039            *pos += 1;
1040        } else {
1041            return Err(Error::new(
1042                ErrorKind::ExpectedToken("']'".to_string()),
1043                self.span(*pos, 1),
1044            ));
1045        }
1046
1047        // Conversion from DSL to hardware notation happens in validate() where width is known
1048        // Use BitRange::new() which defaults to unit 0
1049        Ok((BitRange::new(dsl_start, dsl_end), (dsl_start, dsl_end)))
1050    }
1051}
1052
1053/// Extract contents of a quoted string (strips surrounding `"`).
1054fn extract_quoted_string(s: &str) -> Option<&str> {
1055    let s = s.trim();
1056    if s.starts_with('"') {
1057        let inner = &s[1..];
1058        // Find closing quote (not escaped)
1059        let mut i = 0;
1060        let chars: Vec<char> = inner.chars().collect();
1061        while i < chars.len() {
1062            if chars[i] == '\\' {
1063                i += 2;
1064            } else if chars[i] == '"' {
1065                let byte_pos: usize = inner[..]
1066                    .char_indices()
1067                    .nth(i)
1068                    .map(|(p, _)| p)
1069                    .unwrap_or(inner.len());
1070                return Some(&inner[..byte_pos]);
1071            } else {
1072                i += 1;
1073            }
1074        }
1075    }
1076    None
1077}
1078
1079/// Parse a guard condition string like `ra == 0` or `ra == 0, lk == 1`.
1080fn parse_guard(s: &str, span: &Span) -> Result<Guard, Error> {
1081    let mut conditions = Vec::new();
1082
1083    // Split on `,` or `&&`
1084    let parts: Vec<&str> = if s.contains("&&") {
1085        s.split("&&").collect()
1086    } else {
1087        s.split(',').collect()
1088    };
1089
1090    for part in parts {
1091        let part = part.trim();
1092        if part.is_empty() {
1093            continue;
1094        }
1095
1096        let cond = parse_guard_condition(part, span)?;
1097        conditions.push(cond);
1098    }
1099
1100    if conditions.is_empty() {
1101        return Err(Error::new(
1102            ErrorKind::InvalidGuard("empty guard".to_string()),
1103            span.clone(),
1104        ));
1105    }
1106
1107    Ok(Guard { conditions })
1108}
1109
1110fn parse_guard_condition(s: &str, span: &Span) -> Result<GuardCondition, Error> {
1111    // Try each operator from longest to shortest
1112    let ops: &[(&str, CompareOp)] = &[
1113        ("!=", CompareOp::Ne),
1114        ("<=", CompareOp::Le),
1115        (">=", CompareOp::Ge),
1116        ("==", CompareOp::Eq),
1117        ("<", CompareOp::Lt),
1118        (">", CompareOp::Gt),
1119    ];
1120
1121    for &(op_str, ref op) in ops {
1122        if let Some(pos) = s.find(op_str) {
1123            let left = s[..pos].trim();
1124            let right = s[pos + op_str.len()..].trim();
1125
1126            return Ok(GuardCondition {
1127                left: parse_guard_operand(left, span)?,
1128                op: op.clone(),
1129                right: parse_guard_operand(right, span)?,
1130            });
1131        }
1132    }
1133
1134    Err(Error::new(
1135        ErrorKind::InvalidGuard(format!("no operator found in '{}'", s)),
1136        span.clone(),
1137    ))
1138}
1139
1140fn parse_guard_operand(s: &str, span: &Span) -> Result<GuardOperand, Error> {
1141    let s = s.trim();
1142    if s.is_empty() {
1143        return Err(Error::new(
1144            ErrorKind::InvalidGuard("empty operand".to_string()),
1145            span.clone(),
1146        ));
1147    }
1148
1149    // Try arithmetic: +, - at top level
1150    if let Some(op_pos) = find_guard_arith_op(s, &['+', '-']) {
1151        let left = s[..op_pos].trim();
1152        let op_char = s.as_bytes()[op_pos] as char;
1153        let right = s[op_pos + 1..].trim();
1154        let op = match op_char {
1155            '+' => ArithOp::Add,
1156            '-' => ArithOp::Sub,
1157            _ => unreachable!(),
1158        };
1159        return Ok(GuardOperand::Expr {
1160            left: Box::new(parse_guard_operand(left, span)?),
1161            op,
1162            right: Box::new(parse_guard_operand(right, span)?),
1163        });
1164    }
1165
1166    // Try arithmetic: *, /, % at top level
1167    if let Some(op_pos) = find_guard_arith_op(s, &['*', '/', '%']) {
1168        let left = s[..op_pos].trim();
1169        let op_char = s.as_bytes()[op_pos] as char;
1170        let right = s[op_pos + 1..].trim();
1171        let op = match op_char {
1172            '*' => ArithOp::Mul,
1173            '/' => ArithOp::Div,
1174            '%' => ArithOp::Mod,
1175            _ => unreachable!(),
1176        };
1177        return Ok(GuardOperand::Expr {
1178            left: Box::new(parse_guard_operand(left, span)?),
1179            op,
1180            right: Box::new(parse_guard_operand(right, span)?),
1181        });
1182    }
1183
1184    // Try integer literal
1185    if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
1186        if let Ok(val) = i64::from_str_radix(hex, 16) {
1187            return Ok(GuardOperand::Literal(val));
1188        }
1189    }
1190    if let Ok(val) = s.parse::<i64>() {
1191        return Ok(GuardOperand::Literal(val));
1192    }
1193
1194    // Must be a field reference
1195    Ok(GuardOperand::Field(s.to_string()))
1196}
1197
1198/// Find rightmost top-level arithmetic operator in a guard operand string.
1199fn find_guard_arith_op(s: &str, ops: &[char]) -> Option<usize> {
1200    let bytes = s.as_bytes();
1201    let mut last = None;
1202    for (i, &b) in bytes.iter().enumerate() {
1203        if i > 0 && ops.contains(&(b as char)) {
1204            last = Some(i);
1205        }
1206    }
1207    last
1208}
1209
1210fn is_builtin_type(name: &str) -> bool {
1211    matches!(
1212        name,
1213        "u1" | "u2"
1214            | "u3"
1215            | "u4"
1216            | "u5"
1217            | "u6"
1218            | "u7"
1219            | "u8"
1220            | "u16"
1221            | "u32"
1222            | "i8"
1223            | "i16"
1224            | "i32"
1225            | "bool"
1226    )
1227}
1228
1229/// Convert DSL bit positions to hardware (LSB=0) positions with unit support.
1230///
1231/// For variable-length instructions, bit positions beyond width-1 automatically
1232/// refer to subsequent units. The unit index is computed as bit_position / width.
1233///
1234/// For cross-unit ranges (e.g., `\[8:31\]` with width=16), this splits the range into
1235/// multiple BitRange objects, one for each unit spanned.
1236///
1237/// Returns a Vec of BitRange objects ordered by unit index.
1238pub fn dsl_to_hardware(dsl_start: u32, dsl_end: u32, width: u32, order: BitOrder) -> Vec<BitRange> {
1239    let (dsl_lo, dsl_hi) = (dsl_start.min(dsl_end), dsl_start.max(dsl_end));
1240
1241    let start_unit = dsl_lo / width;
1242    let end_unit = dsl_hi / width;
1243
1244    let mut ranges = Vec::new();
1245
1246    for unit in start_unit..=end_unit {
1247        let unit_dsl_start = unit * width;
1248        let unit_dsl_end = (unit + 1) * width - 1;
1249
1250        // Calculate which bits from this unit are included
1251        let range_start_in_unit = if unit == start_unit {
1252            dsl_lo
1253        } else {
1254            unit_dsl_start
1255        };
1256        let range_end_in_unit = if unit == end_unit {
1257            dsl_hi
1258        } else {
1259            unit_dsl_end
1260        };
1261
1262        // Convert to local positions within the unit
1263        let local_start = range_start_in_unit % width;
1264        let local_end = range_end_in_unit % width;
1265
1266        // Convert to hardware notation
1267        let (hw_start, hw_end) = match order {
1268            BitOrder::Msb0 => {
1269                let hw_a = width - 1 - local_start;
1270                let hw_b = width - 1 - local_end;
1271                (std::cmp::max(hw_a, hw_b), std::cmp::min(hw_a, hw_b))
1272            }
1273            BitOrder::Lsb0 => (
1274                std::cmp::max(local_start, local_end),
1275                std::cmp::min(local_start, local_end),
1276            ),
1277        };
1278
1279        ranges.push(BitRange::new_in_unit(unit, hw_start, hw_end));
1280    }
1281
1282    ranges
1283}