Skip to main content

ought_spec/
parser.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3use std::path::{Path, PathBuf};
4use std::time::Duration;
5
6use pulldown_cmark::{Event, HeadingLevel, Options, Parser as MdParser, Tag, TagEnd};
7
8use crate::types::{
9    Clause, ClauseId, Keyword, Metadata, ParseError, Section, SourceLocation, Spec, SpecRef,
10    Temporal,
11};
12
13/// Parses `.ought.md` files into structured spec IR.
14///
15/// Pure Rust, no LLM dependency. Recognizes CommonMark markdown,
16/// extracts metadata, identifies bold deontic keywords, handles
17/// GIVEN nesting and OTHERWISE chains, and parses MUST BY durations.
18pub struct Parser;
19
20impl Parser {
21    /// Parse a spec file from disk.
22    pub fn parse_file(path: &Path) -> Result<Spec, Vec<ParseError>> {
23        let content = std::fs::read_to_string(path).map_err(|e| {
24            vec![ParseError {
25                file: path.to_path_buf(),
26                line: 0,
27                message: format!("failed to read file: {}", e),
28            }]
29        })?;
30        Self::parse_string(&content, path)
31    }
32
33    /// Parse a spec from a string (for testing or programmatic use).
34    pub fn parse_string(content: &str, path: &Path) -> Result<Spec, Vec<ParseError>> {
35        let mut state = ParseState::new(path.to_path_buf(), content);
36        state.parse();
37
38        if state.errors.is_empty() {
39            Ok(state.into_spec())
40        } else if state.spec_name.is_some() {
41            // We have errors but also partial results — return the spec
42            // The spec says to continue after non-fatal errors.
43            // But the signature returns Result, so if there are errors we return them.
44            Err(state.errors)
45        } else {
46            Err(state.errors)
47        }
48    }
49}
50
51/// Compute a line number (1-based) from byte offset in the source text.
52fn line_number_at_offset(source: &str, offset: usize) -> usize {
53    let clamped = offset.min(source.len());
54    source[..clamped].bytes().filter(|&b| b == b'\n').count() + 1
55}
56
57/// Slugify a string: lowercase, replace non-alphanumeric with underscore, collapse runs.
58fn slugify(s: &str) -> String {
59    let mut result = String::new();
60    let mut last_was_sep = true; // avoid leading underscore
61    for c in s.chars() {
62        if c.is_alphanumeric() {
63            result.push(c.to_ascii_lowercase());
64            last_was_sep = false;
65        } else if !last_was_sep {
66            result.push('_');
67            last_was_sep = true;
68        }
69    }
70    // Trim trailing underscore
71    while result.ends_with('_') {
72        result.pop();
73    }
74    // Truncate to reasonable length
75    if result.len() > 60 {
76        result.truncate(60);
77        while result.ends_with('_') {
78            result.pop();
79        }
80    }
81    result
82}
83
84/// Generate a content hash from keyword, text, and condition.
85fn content_hash(keyword: Keyword, text: &str, condition: &Option<String>) -> String {
86    let mut hasher = DefaultHasher::new();
87    format!("{:?}", keyword).hash(&mut hasher);
88    text.hash(&mut hasher);
89    condition.hash(&mut hasher);
90    format!("{:016x}", hasher.finish())
91}
92
93/// Try to parse a keyword from bold text. Returns (Keyword, Option<Duration>) for MUST BY.
94fn parse_keyword(bold_text: &str) -> Option<(Keyword, Option<Duration>)> {
95    let upper = bold_text.trim().to_uppercase();
96    match upper.as_str() {
97        "MUST" => Some((Keyword::Must, None)),
98        "MUST NOT" => Some((Keyword::MustNot, None)),
99        "SHOULD" => Some((Keyword::Should, None)),
100        "SHOULD NOT" => Some((Keyword::ShouldNot, None)),
101        "MAY" => Some((Keyword::May, None)),
102        "WONT" => Some((Keyword::Wont, None)),
103        "GIVEN" => Some((Keyword::Given, None)),
104        "OTHERWISE" => Some((Keyword::Otherwise, None)),
105        "MUST ALWAYS" => Some((Keyword::MustAlways, None)),
106        _ => {
107            // Check for MUST BY <duration>
108            if upper.starts_with("MUST BY") {
109                let after_must_by = bold_text.trim()[7..].trim();
110                if after_must_by.is_empty() {
111                    // "MUST BY" with no duration — return keyword but no duration
112                    // so the caller can report a parse error
113                    return Some((Keyword::MustBy, None));
114                }
115                if let Some(dur) = parse_duration(after_must_by) {
116                    return Some((Keyword::MustBy, Some(dur)));
117                }
118                // Has text after MUST BY but it's not a valid duration — still
119                // return the keyword so the caller can error
120                return Some((Keyword::MustBy, None));
121            }
122            None
123        }
124    }
125}
126
127/// Parse a duration string like "200ms", "5s", "30m".
128fn parse_duration(s: &str) -> Option<Duration> {
129    let s = s.trim();
130    if let Some(num_str) = s.strip_suffix("ms") {
131        let num = num_str.trim().parse::<u64>().ok()?;
132        Some(Duration::from_millis(num))
133    } else if let Some(num_str) = s.strip_suffix('m') {
134        let num = num_str.trim().parse::<u64>().ok()?;
135        Some(Duration::from_secs(num * 60))
136    } else if let Some(num_str) = s.strip_suffix('s') {
137        let num = num_str.trim().parse::<u64>().ok()?;
138        Some(Duration::from_secs(num))
139    } else {
140        None
141    }
142}
143
144// ─── Parse state machine ───────────────────────────────────────────────────
145
146/// Saved state for one list-item level on the item stack.
147#[derive(Debug)]
148struct ItemFrame {
149    text: String,
150    keyword: Option<(Keyword, Option<Duration>)>,
151    keyword_consumed: bool,
152    line: usize,
153    /// Nested items collected while this frame is active (from child list items).
154    nested_items: Vec<PendingItem>,
155}
156
157struct ParseState {
158    file: PathBuf,
159    source: String,
160    errors: Vec<ParseError>,
161
162    // Result accumulation
163    spec_name: Option<String>,
164    metadata: Metadata,
165    sections: Vec<Section>,
166
167    // Section stack: (depth, section) — we flatten at the end
168    section_stack: Vec<(u8, Section)>,
169
170    // State tracking
171    in_heading: Option<HeadingLevel>,
172    heading_text: String,
173    in_strong: bool,
174    strong_text: String,
175
176    // Item stack: each Start(Item) pushes a frame; End(Item) pops it
177    item_stack: Vec<ItemFrame>,
178
179    list_depth: usize, // 0 = not in list, 1 = top-level list, 2 = nested, etc.
180    metadata_region: bool, // between H1 and first H2
181    prose_buf: String,
182    just_finished_clause: bool, // to capture following code blocks as hints
183    in_code_block: bool,
184    code_block_text: String,
185
186    // Track current byte offset for line number estimation
187    current_offset: usize,
188
189    // For metadata, track paragraph text between H1 and first H2
190    metadata_paragraph_text: String,
191    in_metadata_paragraph: bool,
192
193    // For metadata link parsing: when we're inside a Link in metadata paragraph
194    in_metadata_link: bool,
195    metadata_link_url: String,
196    metadata_link_label: String,
197
198    // Track item nesting to handle GIVEN and OTHERWISE
199    // depth-1 items: top-level clauses or GIVEN
200    // depth-2 items: nested under GIVEN or OTHERWISE children
201    depth1_items: Vec<PendingItem>,
202}
203
204#[derive(Debug, Clone)]
205struct PendingItem {
206    keyword: Keyword,
207    text: String,
208    temporal: Option<Temporal>,
209    line: usize,
210    nested_items: Vec<PendingItem>,
211    hints: Vec<String>,
212}
213
214impl ParseState {
215    fn new(file: PathBuf, source: &str) -> Self {
216        Self {
217            file,
218            source: source.to_string(),
219            errors: Vec::new(),
220            spec_name: None,
221            metadata: Metadata::default(),
222            sections: Vec::new(),
223            section_stack: Vec::new(),
224            in_heading: None,
225            heading_text: String::new(),
226            in_strong: false,
227            strong_text: String::new(),
228            item_stack: Vec::new(),
229            list_depth: 0,
230            metadata_region: false,
231            prose_buf: String::new(),
232            just_finished_clause: false,
233            in_code_block: false,
234            code_block_text: String::new(),
235            current_offset: 0,
236            metadata_paragraph_text: String::new(),
237            in_metadata_paragraph: false,
238            in_metadata_link: false,
239            metadata_link_url: String::new(),
240            metadata_link_label: String::new(),
241            depth1_items: Vec::new(),
242        }
243    }
244
245    /// Are we currently inside a list item (at any depth)?
246    fn in_list_item(&self) -> bool {
247        !self.item_stack.is_empty()
248    }
249
250    /// Get the current (innermost) item frame mutably.
251    fn current_item_mut(&mut self) -> Option<&mut ItemFrame> {
252        self.item_stack.last_mut()
253    }
254
255    fn parse(&mut self) {
256        // Pre-extract metadata from raw source text before markdown parsing.
257        // This avoids pulldown-cmark interpreting `**` in paths (like `src/**/*.rs`)
258        // as bold markers.
259        self.extract_raw_metadata();
260
261        let source = self.source.clone();
262        // Collect events with their offsets
263        let events: Vec<(Event<'_>, std::ops::Range<usize>)> =
264            MdParser::new_ext(&source, Options::empty())
265                .into_offset_iter()
266                .collect();
267
268        for (event, range) in events {
269            self.current_offset = range.start;
270            self.handle_event(event);
271        }
272
273        // Flush any remaining items/sections
274        self.flush_pending_items();
275        self.flush_section_stack();
276    }
277
278    /// Extract metadata (context:, source:, schema:, requires:) from raw source text.
279    /// This runs before markdown parsing to avoid pulldown-cmark mangling glob patterns.
280    fn extract_raw_metadata(&mut self) {
281        let source = self.source.clone();
282        let mut in_metadata = false;
283
284        for line in source.lines() {
285            let trimmed = line.trim();
286
287            // Look for H1 to start metadata region
288            if !in_metadata {
289                if trimmed.starts_with("# ") {
290                    in_metadata = true;
291                    if let Some(rest) = trimmed.strip_prefix("# ") {
292                        self.spec_name = Some(rest.trim().to_string());
293                    }
294                }
295                continue;
296            }
297
298            // H2+ ends metadata region
299            if trimmed.starts_with("## ") || trimmed.starts_with("### ") {
300                break;
301            }
302
303            // Parse metadata lines from raw text
304            if let Some(rest) = trimmed.strip_prefix("context:") {
305                let val = rest.trim();
306                if !val.is_empty() {
307                    self.metadata.context = Some(val.to_string());
308                }
309            } else if let Some(rest) = trimmed.strip_prefix("source:") {
310                let val = rest.trim();
311                if !val.is_empty() {
312                    for s in split_metadata_values(val) {
313                        self.metadata.sources.push(s);
314                    }
315                }
316            } else if let Some(rest) = trimmed.strip_prefix("schema:") {
317                let val = rest.trim();
318                if !val.is_empty() {
319                    for s in split_metadata_values(val) {
320                        self.metadata.schemas.push(s);
321                    }
322                }
323            } else if let Some(rest) = trimmed.strip_prefix("requires:") {
324                let val = rest.trim();
325                if !val.is_empty() {
326                    let refs = parse_requires_line(val);
327                    if refs.is_empty() {
328                        self.metadata.requires.push(SpecRef {
329                            label: val.to_string(),
330                            path: PathBuf::from(val),
331                            anchor: None,
332                        });
333                    } else {
334                        self.metadata.requires.extend(refs);
335                    }
336                }
337            }
338        }
339    }
340
341    fn handle_event(&mut self, event: Event<'_>) {
342        match event {
343            Event::Start(Tag::Heading { level, .. }) => {
344                self.in_heading = Some(level);
345                self.heading_text.clear();
346            }
347            Event::End(TagEnd::Heading(level)) => {
348                let title = std::mem::take(&mut self.heading_text).trim().to_string();
349                self.in_heading = None;
350
351                match level {
352                    HeadingLevel::H1 => {
353                        if self.spec_name.is_none() {
354                            self.spec_name = Some(title);
355                        }
356                        // Metadata is now extracted in extract_raw_metadata(),
357                        // so we don't enable metadata_region for event-based parsing.
358                    }
359                    _ => {
360                        self.metadata_region = false;
361                        // Flush pending items before starting new section
362                        self.flush_pending_items();
363                        // Flush prose to current section
364                        self.flush_prose();
365
366                        let depth = match level {
367                            HeadingLevel::H1 => 1,
368                            HeadingLevel::H2 => 2,
369                            HeadingLevel::H3 => 3,
370                            HeadingLevel::H4 => 4,
371                            HeadingLevel::H5 => 5,
372                            HeadingLevel::H6 => 6,
373                        };
374
375                        let section = Section {
376                            title: title.clone(),
377                            depth,
378                            prose: String::new(),
379                            clauses: Vec::new(),
380                            subsections: Vec::new(),
381                        };
382
383                        // Pop sections from stack that are at same or deeper depth
384                        while let Some((d, _)) = self.section_stack.last() {
385                            if *d >= depth {
386                                let (_, popped) = self.section_stack.pop().unwrap();
387                                if let Some((_, parent)) = self.section_stack.last_mut() {
388                                    parent.subsections.push(popped);
389                                } else {
390                                    self.sections.push(popped);
391                                }
392                            } else {
393                                break;
394                            }
395                        }
396
397                        self.section_stack.push((depth, section));
398                        self.just_finished_clause = false;
399                    }
400                }
401            }
402
403            Event::Start(Tag::Strong) => {
404                self.in_strong = true;
405                self.strong_text.clear();
406            }
407            Event::End(TagEnd::Strong) => {
408                self.in_strong = false;
409                let bold_text = std::mem::take(&mut self.strong_text);
410
411                if self.in_heading.is_some() {
412                    self.heading_text.push_str(&bold_text);
413                } else if let Some(frame) = self.current_item_mut() {
414                    if !frame.keyword_consumed {
415                        // Try to parse as a keyword
416                        if let Some(kw) = parse_keyword(&bold_text) {
417                            frame.keyword = Some(kw);
418                            frame.keyword_consumed = true;
419                        } else {
420                            // Not a keyword, just bold text in list item
421                            frame.text.push_str("**");
422                            frame.text.push_str(&bold_text);
423                            frame.text.push_str("**");
424                        }
425                    } else {
426                        // Already have keyword, this bold text is part of clause text
427                        frame.text.push_str("**");
428                        frame.text.push_str(&bold_text);
429                        frame.text.push_str("**");
430                    }
431                } else if self.in_metadata_paragraph {
432                    self.metadata_paragraph_text.push_str("**");
433                    self.metadata_paragraph_text.push_str(&bold_text);
434                    self.metadata_paragraph_text.push_str("**");
435                } else {
436                    // Bold in prose
437                    self.prose_buf.push_str("**");
438                    self.prose_buf.push_str(&bold_text);
439                    self.prose_buf.push_str("**");
440                }
441            }
442
443            Event::Start(Tag::List(_)) => {
444                self.list_depth += 1;
445            }
446            Event::End(TagEnd::List(_)) => {
447                if self.list_depth == 1 {
448                    // End of top-level list: flush all pending items
449                    self.flush_pending_items();
450                    // Keep just_finished_clause alive so code blocks after the list
451                    // can be captured as hints
452                }
453                if self.list_depth > 0 {
454                    self.list_depth -= 1;
455                }
456            }
457
458            Event::Start(Tag::Item) => {
459                let line = line_number_at_offset(&self.source, self.current_offset);
460                self.item_stack.push(ItemFrame {
461                    text: String::new(),
462                    keyword: None,
463                    keyword_consumed: false,
464                    line,
465                    nested_items: Vec::new(),
466                });
467            }
468            Event::End(TagEnd::Item) => {
469                if let Some(frame) = self.item_stack.pop() {
470                    let text = frame.text.trim().to_string();
471                    let keyword = frame.keyword;
472                    let line = frame.line;
473                    let nested_items = frame.nested_items;
474
475                    if let Some((kw, dur)) = keyword {
476                        // Validate MUST BY has a duration
477                        if kw == Keyword::MustBy && dur.is_none() {
478                            self.errors.push(ParseError {
479                                file: self.file.clone(),
480                                line,
481                                message: "MUST BY requires a duration (e.g. MUST BY 200ms, MUST BY 5s)".to_string(),
482                            });
483                            // Don't produce a clause for this — it's a parse error
484                        }
485
486                        // Validate OTHERWISE is nested under a parent obligation
487                        if kw == Keyword::Otherwise && self.item_stack.is_empty() {
488                            self.errors.push(ParseError {
489                                file: self.file.clone(),
490                                line,
491                                message: "OTHERWISE must be nested under a parent obligation (MUST, SHOULD, etc.), not at the top level".to_string(),
492                            });
493                        }
494
495                        // Validate OTHERWISE is not under MAY, WONT, or GIVEN
496                        if kw == Keyword::Otherwise
497                            && let Some(parent_frame) = self.item_stack.last()
498                            && let Some((parent_kw, _)) = &parent_frame.keyword
499                            && matches!(parent_kw, Keyword::May | Keyword::Wont | Keyword::Given)
500                        {
501                            self.errors.push(ParseError {
502                                file: self.file.clone(),
503                                line,
504                                message: format!(
505                                    "OTHERWISE cannot be nested under {} — only under obligations that can be violated (MUST, SHOULD, etc.)",
506                                    match parent_kw {
507                                        Keyword::May => "MAY",
508                                        Keyword::Wont => "WONT",
509                                        Keyword::Given => "GIVEN",
510                                        _ => unreachable!(),
511                                    }
512                                ),
513                            });
514                        }
515
516                        let temporal = match kw {
517                            Keyword::MustAlways => Some(Temporal::Invariant),
518                            Keyword::MustBy => dur.map(Temporal::Deadline),
519                            _ => None,
520                        };
521
522                        // Skip creating the item if it was an invalid MUST BY
523                        if kw == Keyword::MustBy && dur.is_none() {
524                            // error already recorded above
525                        } else {
526                        let item = PendingItem {
527                            keyword: kw,
528                            text,
529                            temporal,
530                            line,
531                            nested_items,
532                            hints: Vec::new(),
533                        };
534
535                        // Determine nesting: if item_stack is empty, this is a top-level item.
536                        // If item_stack is non-empty, this is nested under the parent frame.
537                        if let Some(parent_frame) = self.item_stack.last_mut() {
538                            parent_frame.nested_items.push(item);
539                        } else {
540                            // Top-level item
541                            self.depth1_items.push(item);
542                        }
543                        }
544                        self.just_finished_clause = true;
545                    } else {
546                        // Non-clause list item — treat as prose
547                        if !text.is_empty() {
548                            self.prose_buf.push_str("- ");
549                            self.prose_buf.push_str(&text);
550                            self.prose_buf.push('\n');
551                        }
552                        self.just_finished_clause = false;
553                    }
554                }
555            }
556
557            Event::Start(Tag::CodeBlock(_)) => {
558                self.in_code_block = true;
559                self.code_block_text.clear();
560            }
561            Event::End(TagEnd::CodeBlock) => {
562                self.in_code_block = false;
563                let code = std::mem::take(&mut self.code_block_text);
564                if self.just_finished_clause && !code.trim().is_empty() {
565                    // Attach to the most recent clause as a hint.
566                    // If pending items exist, attach to last one.
567                    if let Some(last) = self.depth1_items.last_mut() {
568                        if let Some(nested_last) = last.nested_items.last_mut() {
569                            nested_last.hints.push(code);
570                        } else {
571                            last.hints.push(code);
572                        }
573                    } else {
574                        // Items already flushed — attach to last clause in current section
575                        self.attach_hint_to_last_clause(code);
576                    }
577                } else {
578                    // Code block as prose
579                    self.prose_buf.push_str("```\n");
580                    self.prose_buf.push_str(&code);
581                    self.prose_buf.push_str("```\n");
582                }
583            }
584
585            Event::Start(Tag::Paragraph) => {
586                if self.metadata_region && !self.in_list_item() {
587                    self.in_metadata_paragraph = true;
588                    self.metadata_paragraph_text.clear();
589                }
590            }
591            Event::End(TagEnd::Paragraph) => {
592                if self.in_metadata_paragraph {
593                    self.in_metadata_paragraph = false;
594                    let para = std::mem::take(&mut self.metadata_paragraph_text);
595                    self.parse_metadata_block(&para);
596                } else if !self.in_list_item() {
597                    self.prose_buf.push('\n');
598                }
599            }
600
601            // Handle links — important for metadata `requires:` parsing
602            Event::Start(Tag::Link { dest_url, .. }) => {
603                if self.in_metadata_paragraph {
604                    self.in_metadata_link = true;
605                    self.metadata_link_url = dest_url.to_string();
606                    self.metadata_link_label.clear();
607                }
608            }
609            Event::End(TagEnd::Link) => {
610                if self.in_metadata_link {
611                    self.in_metadata_link = false;
612                    // Reconstruct the markdown link syntax so parse_metadata_block can parse it
613                    let label = std::mem::take(&mut self.metadata_link_label);
614                    let url = std::mem::take(&mut self.metadata_link_url);
615                    self.metadata_paragraph_text
616                        .push_str(&format!("[{}]({})", label, url));
617                }
618            }
619
620            Event::Text(text) => {
621                self.handle_text(&text);
622            }
623            Event::Code(code) => {
624                if self.in_heading.is_some() {
625                    self.heading_text.push('`');
626                    self.heading_text.push_str(&code);
627                    self.heading_text.push('`');
628                } else if let Some(frame) = self.current_item_mut() {
629                    frame.text.push('`');
630                    frame.text.push_str(&code);
631                    frame.text.push('`');
632                } else if self.in_metadata_paragraph {
633                    self.metadata_paragraph_text.push('`');
634                    self.metadata_paragraph_text.push_str(&code);
635                    self.metadata_paragraph_text.push('`');
636                } else {
637                    self.prose_buf.push('`');
638                    self.prose_buf.push_str(&code);
639                    self.prose_buf.push('`');
640                }
641            }
642            Event::SoftBreak | Event::HardBreak => {
643                if self.in_heading.is_some() {
644                    self.heading_text.push(' ');
645                } else if self.in_list_item() {
646                    if let Some(frame) = self.current_item_mut() {
647                        frame.text.push(' ');
648                    }
649                } else if self.in_metadata_paragraph {
650                    self.metadata_paragraph_text.push('\n');
651                } else {
652                    self.prose_buf.push('\n');
653                }
654            }
655
656            _ => {}
657        }
658    }
659
660    fn handle_text(&mut self, text: &str) {
661        if self.in_code_block {
662            self.code_block_text.push_str(text);
663        } else if self.in_strong {
664            self.strong_text.push_str(text);
665        } else if self.in_heading.is_some() {
666            self.heading_text.push_str(text);
667        } else if self.in_metadata_link {
668            // Text inside a link in metadata — capture as label
669            self.metadata_link_label.push_str(text);
670        } else if self.in_list_item() {
671            if let Some(frame) = self.current_item_mut() {
672                frame.text.push_str(text);
673            }
674        } else if self.in_metadata_paragraph {
675            self.metadata_paragraph_text.push_str(text);
676        } else {
677            self.prose_buf.push_str(text);
678        }
679    }
680
681    fn parse_metadata_block(&mut self, text: &str) {
682        for line in text.lines() {
683            let trimmed = line.trim();
684            if trimmed.is_empty() {
685                continue;
686            }
687            if let Some(rest) = trimmed.strip_prefix("context:") {
688                let val = rest.trim();
689                if !val.is_empty() {
690                    self.metadata.context = Some(val.to_string());
691                }
692            } else if let Some(rest) = trimmed.strip_prefix("source:") {
693                let val = rest.trim();
694                if !val.is_empty() {
695                    for s in split_metadata_values(val) {
696                        self.metadata.sources.push(s);
697                    }
698                }
699            } else if let Some(rest) = trimmed.strip_prefix("schema:") {
700                let val = rest.trim();
701                if !val.is_empty() {
702                    for s in split_metadata_values(val) {
703                        self.metadata.schemas.push(s);
704                    }
705                }
706            } else if let Some(rest) = trimmed.strip_prefix("requires:") {
707                let val = rest.trim();
708                if !val.is_empty() {
709                    let refs = parse_requires_line(val);
710                    if refs.is_empty() {
711                        // Plain text path (no markdown link syntax)
712                        self.metadata.requires.push(SpecRef {
713                            label: val.to_string(),
714                            path: PathBuf::from(val),
715                            anchor: None,
716                        });
717                    } else {
718                        self.metadata.requires.extend(refs);
719                    }
720                }
721            }
722            // Lines that don't match any metadata prefix are ignored as free-form text
723        }
724    }
725
726    fn flush_prose(&mut self) {
727        let prose = std::mem::take(&mut self.prose_buf).trim().to_string();
728        if !prose.is_empty()
729            && let Some((_, section)) = self.section_stack.last_mut() {
730                if section.prose.is_empty() {
731                    section.prose = prose;
732                } else {
733                    section.prose.push('\n');
734                    section.prose.push_str(&prose);
735                }
736            }
737    }
738
739    fn flush_pending_items(&mut self) {
740        let items = std::mem::take(&mut self.depth1_items);
741        if items.is_empty() {
742            return;
743        }
744
745        let spec_name = match &self.spec_name {
746            Some(n) => slugify(n),
747            None => "unknown".to_string(),
748        };
749
750        let section_path = self
751            .section_stack
752            .iter()
753            .map(|(_, s)| slugify(&s.title))
754            .collect::<Vec<_>>();
755
756        for item in items {
757            let clauses = self.items_to_clauses(&spec_name, &section_path, item, None);
758            if let Some((_, section)) = self.section_stack.last_mut() {
759                section.clauses.extend(clauses);
760            }
761        }
762
763        // Note: we intentionally do NOT reset just_finished_clause here,
764        // so that a code block right after the list can still be captured as a hint.
765    }
766
767    fn items_to_clauses(
768        &self,
769        spec_name: &str,
770        section_path: &[String],
771        item: PendingItem,
772        given_condition: Option<String>,
773    ) -> Vec<Clause> {
774        let mut result = Vec::new();
775
776        match item.keyword {
777            Keyword::Given => {
778                // GIVEN is a grouping construct. Its text is the condition.
779                // All nested items inherit this condition.
780                let condition = Some(item.text.clone());
781                for nested in item.nested_items {
782                    let nested_clauses =
783                        self.items_to_clauses(spec_name, section_path, nested, condition.clone());
784                    result.extend(nested_clauses);
785                }
786            }
787            _ => {
788                // Build the clause
789                let condition = given_condition;
790                let keyword_slug = match item.keyword {
791                    Keyword::Must => "must",
792                    Keyword::MustNot => "must_not",
793                    Keyword::Should => "should",
794                    Keyword::ShouldNot => "should_not",
795                    Keyword::May => "may",
796                    Keyword::Wont => "wont",
797                    Keyword::MustAlways => "must_always",
798                    Keyword::MustBy => "must_by",
799                    Keyword::Otherwise => "otherwise",
800                    Keyword::Given => unreachable!(),
801                };
802
803                let text_slug = slugify(&item.text);
804                let summary = if text_slug.is_empty() {
805                    keyword_slug.to_string()
806                } else {
807                    format!("{}_{}", keyword_slug, text_slug)
808                };
809
810                // Build ID: spec_name::section1::section2::keyword_summary
811                let mut id_parts: Vec<&str> = Vec::new();
812                id_parts.push(spec_name);
813                for sp in section_path {
814                    id_parts.push(sp);
815                }
816                id_parts.push(&summary);
817                let id_str = id_parts.join("::");
818
819                // Truncate if too long
820                let id_str = if id_str.len() > 120 {
821                    let mut s = id_str[..120].to_string();
822                    while s.ends_with('_') || s.ends_with(':') {
823                        s.pop();
824                    }
825                    s
826                } else {
827                    id_str
828                };
829
830                let hash = content_hash(item.keyword, &item.text, &condition);
831
832                // Build otherwise clauses from nested items that are OTHERWISE
833                let mut otherwise_clauses = Vec::new();
834                let mut other_nested = Vec::new();
835
836                for nested in item.nested_items {
837                    if nested.keyword == Keyword::Otherwise {
838                        // Build otherwise clause
839                        let ow_summary = format!("otherwise_{}", slugify(&nested.text));
840                        let mut ow_id_parts: Vec<&str> = Vec::new();
841                        ow_id_parts.push(spec_name);
842                        for sp in section_path {
843                            ow_id_parts.push(sp);
844                        }
845                        ow_id_parts.push(&ow_summary);
846                        let ow_id_str = ow_id_parts.join("::");
847
848                        let ow_hash = content_hash(Keyword::Otherwise, &nested.text, &condition);
849
850                        otherwise_clauses.push(Clause {
851                            id: ClauseId(ow_id_str),
852                            keyword: Keyword::Otherwise,
853                            severity: item.keyword.severity(), // inherit parent severity
854                            text: nested.text,
855                            condition: condition.clone(),
856                            otherwise: Vec::new(),
857                            temporal: None,
858                            hints: nested.hints,
859                            source_location: SourceLocation {
860                                file: self.file.clone(),
861                                line: nested.line,
862                            },
863                            content_hash: ow_hash,
864                        });
865                    } else {
866                        other_nested.push(nested);
867                    }
868                }
869
870                let clause = Clause {
871                    id: ClauseId(id_str),
872                    keyword: item.keyword,
873                    severity: item.keyword.severity(),
874                    text: item.text,
875                    condition,
876                    otherwise: otherwise_clauses,
877                    temporal: item.temporal,
878                    hints: item.hints,
879                    source_location: SourceLocation {
880                        file: self.file.clone(),
881                        line: item.line,
882                    },
883                    content_hash: hash,
884                };
885
886                result.push(clause);
887
888                // Any non-OTHERWISE nested items get turned into clauses too
889                // (rare case, but handle gracefully)
890                for nested in other_nested {
891                    let nested_clauses =
892                        self.items_to_clauses(spec_name, section_path, nested, None);
893                    result.extend(nested_clauses);
894                }
895            }
896        }
897
898        result
899    }
900
901    fn attach_hint_to_last_clause(&mut self, code: String) {
902        if let Some((_, section)) = self.section_stack.last_mut()
903            && let Some(clause) = section.clauses.last_mut() {
904                clause.hints.push(code);
905            }
906    }
907
908    fn flush_section_stack(&mut self) {
909        // Flush any remaining prose
910        self.flush_prose();
911
912        // Pop all sections from the stack
913        while let Some((_, section)) = self.section_stack.pop() {
914            if let Some((_, parent)) = self.section_stack.last_mut() {
915                parent.subsections.push(section);
916            } else {
917                self.sections.push(section);
918            }
919        }
920    }
921
922    fn into_spec(self) -> Spec {
923        Spec {
924            name: self.spec_name.unwrap_or_else(|| "Untitled".to_string()),
925            metadata: self.metadata,
926            sections: self.sections,
927            source_path: self.file,
928        }
929    }
930}
931
932/// Split metadata values by commas, but respect values that contain glob patterns
933/// (e.g. `tests/**/*.rs`). Commas inside path-like values with `*`, `?`, `[` are
934/// preserved — we only split on commas followed by whitespace and a new path.
935fn split_metadata_values(val: &str) -> Vec<String> {
936    // Simple approach: split by comma, then re-join segments that look like
937    // they're part of a glob pattern (contain * or ? after a comma).
938    // Actually, simplest correct approach: just split by ", " (comma-space)
939    // which is the expected delimiter, and trim each result.
940    val.split(',')
941        .map(|s| s.trim().to_string())
942        .filter(|s| !s.is_empty())
943        .collect()
944}
945
946/// Parse `requires:` value containing markdown links like `[label](path.ought.md)` and
947/// `[label](path.ought.md#anchor)`.
948fn parse_requires_line(line: &str) -> Vec<SpecRef> {
949    let mut refs = Vec::new();
950    let mut rest = line;
951    while let Some(open_bracket) = rest.find('[') {
952        rest = &rest[open_bracket..];
953        // Find ](
954        if let Some(bracket_paren) = rest.find("](") {
955            let label = &rest[1..bracket_paren];
956            let after_paren = &rest[bracket_paren + 2..];
957            if let Some(close_paren) = after_paren.find(')') {
958                let url = &after_paren[..close_paren];
959                let (path_str, anchor) = if let Some(hash_pos) = url.find('#') {
960                    (&url[..hash_pos], Some(url[hash_pos + 1..].to_string()))
961                } else {
962                    (url, None)
963                };
964                refs.push(SpecRef {
965                    label: label.to_string(),
966                    path: PathBuf::from(path_str),
967                    anchor,
968                });
969                rest = &after_paren[close_paren + 1..];
970            } else {
971                rest = &rest[1..];
972            }
973        } else {
974            rest = &rest[1..];
975        }
976    }
977    refs
978}