Skip to main content

busbar_sf_agentscript/parser/
mod.rs

1//! Parser for AgentScript source code.
2//!
3//! This module provides a complete parser that converts AgentScript source
4//! into a typed Abstract Syntax Tree ([`AgentFile`]).
5//!
6//! # Architecture
7//!
8//! The parser uses a two-phase approach:
9//!
10//! 1. **Lexical analysis** - Source → Tokens (via [`crate::lexer`])
11//! 2. **Parsing** - Tokens → AST (via chumsky combinators)
12//!
13//! # Usage
14//!
15//! ```rust
16//! use busbar_sf_agentscript::parser::parse;
17//!
18//! let source = r#"
19//! config:
20//!    agent_name: "MyAgent"
21//!
22//! topic main:
23//!    description: "Main topic"
24//! "#;
25//!
26//! match parse(source) {
27//!     Ok(agent) => {
28//!         println!("Parsed {} topics", agent.topics.len());
29//!     }
30//!     Err(errors) => {
31//!         for err in errors {
32//!             eprintln!("{}", err);
33//!         }
34//!     }
35//! }
36//! ```
37//!
38//! # Error Handling
39//!
40//! Use [`parse_with_errors()`] for partial parsing that returns both
41//! the result and any errors encountered:
42//!
43//! ```rust
44//! use busbar_sf_agentscript::parser::parse_with_errors;
45//!
46//! let source = "config:\n   agent_name: \"Test\"";
47//! let (result, errors) = parse_with_errors(source);
48//!
49//! if let Some(agent) = result {
50//!     println!("Parsed successfully");
51//! }
52//! for err in errors {
53//!     eprintln!("Warning: {}", err);
54//! }
55//! ```
56//!
57//! # Module Structure
58//!
59//! The parser is split into submodules for each block type:
60//!
61//! - `config` - Config block parsing
62//! - `variables` - Variable declarations
63//! - `system` - System instructions and messages
64//! - `topics` - Topic and start_agent blocks
65//! - `actions` - Action definitions
66//! - `reasoning` - Reasoning blocks
67//! - `expressions` - Expression parsing
68//! - `instructions` - Static and dynamic instructions
69//!
70//! [`AgentFile`]: crate::ast::AgentFile
71
72mod actions;
73mod config;
74mod connections;
75mod directives;
76mod expressions;
77mod instructions;
78mod language;
79mod primitives;
80mod reasoning;
81mod system;
82#[cfg(not(test))]
83mod tests;
84mod topics;
85mod variables;
86
87use crate::ast::AgentFile;
88use crate::lexer;
89
90// Re-export the span type
91pub use primitives::Span;
92
93/// Convert a character offset to (line, column) - both 1-indexed
94fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
95    let mut line = 1;
96    let mut col = 1;
97    for (i, ch) in source.char_indices() {
98        if i >= offset {
99            break;
100        }
101        if ch == '\n' {
102            line += 1;
103            col = 1;
104        } else {
105            col += 1;
106        }
107    }
108    (line, col)
109}
110
111/// Get the line content at a given line number (1-indexed)
112fn get_line_content(source: &str, line_num: usize) -> &str {
113    source.lines().nth(line_num.saturating_sub(1)).unwrap_or("")
114}
115
116/// Format a chumsky Rich error into a human-readable string with line/column info
117fn format_parse_error<'tokens, 'src>(
118    source: &str,
119    error: &Rich<'tokens, crate::lexer::Token<'src>, primitives::Span>,
120) -> String {
121    let span = error.span();
122    let (line, col) = offset_to_line_col(source, span.start);
123    let line_content = get_line_content(source, line);
124
125    // Build expected string
126    let expected: Vec<String> = error.expected().map(|e| format!("{}", e)).collect();
127    let expected_str = if expected.is_empty() {
128        String::new()
129    } else if expected.len() == 1 {
130        format!(", expected {}", expected[0])
131    } else {
132        format!(", expected one of: {}", expected.join(", "))
133    };
134
135    // Build found string
136    let found_str = match error.found() {
137        Some(tok) => format!("found '{}'", tok),
138        None => "found end of input".to_string(),
139    };
140
141    // Build context chain from .labelled() calls - shows WHERE in the parse tree we failed
142    let contexts: Vec<_> = error.contexts().collect();
143    let context_str = if contexts.is_empty() {
144        String::new()
145    } else {
146        let ctx_labels: Vec<String> = contexts
147            .iter()
148            .map(|(label, ctx_span)| {
149                let (ctx_line, _) = offset_to_line_col(source, ctx_span.start);
150                format!("{} (line {})", label, ctx_line)
151            })
152            .collect();
153        format!("\n  while parsing: {}", ctx_labels.join(" > "))
154    };
155
156    // Format with context
157    format!(
158        "Error at line {}, column {}: {}{}{}\n  |\n{:>3} | {}\n  | {}{}",
159        line,
160        col,
161        found_str,
162        expected_str,
163        context_str,
164        line,
165        line_content,
166        " ".repeat(col.saturating_sub(1)),
167        "^".repeat(
168            (span.end - span.start)
169                .max(1)
170                .min(line_content.len().saturating_sub(col - 1).max(1))
171        )
172    )
173}
174
175/// Format a lexer error into a human-readable string
176fn format_lexer_error(
177    source: &str,
178    error: &impl std::fmt::Debug,
179    span_start: usize,
180    span_end: usize,
181) -> String {
182    let (line, col) = offset_to_line_col(source, span_start);
183    let line_content = get_line_content(source, line);
184
185    format!(
186        "Lexer error at line {}, column {}: {:?}\n  |\n{:>3} | {}\n  | {}{}",
187        line,
188        col,
189        error,
190        line,
191        line_content,
192        " ".repeat(col.saturating_sub(1)),
193        "^".repeat(
194            (span_end - span_start)
195                .max(1)
196                .min(line_content.len().saturating_sub(col - 1).max(1))
197        )
198    )
199}
200
201// Re-export primitives needed by agent_file_parser
202use primitives::{skip_toplevel_noise, ParserInput};
203
204use chumsky::input::Input as _;
205use chumsky::prelude::*;
206use chumsky::recovery::skip_then_retry_until;
207
208use config::config_block;
209use connections::{connection_block, connections_wrapper_block};
210use language::language_block;
211use system::system_block;
212use topics::{start_agent_block, topic_block};
213use variables::variables_block;
214
215/// Parse an AgentScript file from source code.
216///
217/// Returns Ok only if parsing succeeds with no errors.
218/// Use `parse_with_errors` to get partial results and all errors.
219pub fn parse(source: &str) -> Result<AgentFile, Vec<String>> {
220    let (result, errors) = parse_with_errors(source);
221    if errors.is_empty() {
222        result.ok_or_else(|| vec!["Unknown parse error".to_string()])
223    } else {
224        Err(errors)
225    }
226}
227
228/// Parse an AgentScript file from source with full error reporting.
229///
230/// Returns both a partial AST (if recovery succeeded) and ALL errors found.
231/// This allows collecting multiple errors in a single parse pass.
232pub fn parse_with_errors(source: &str) -> (Option<AgentFile>, Vec<String>) {
233    // Phase 1: Lexical analysis with indentation tokens
234    let tokens = match lexer::lex_with_indentation(source) {
235        Ok(tokens) => tokens,
236        Err(errs) => {
237            let errors: Vec<String> = errs
238                .iter()
239                .map(|e| {
240                    let span = e.span();
241                    format_lexer_error(source, &e.reason(), span.start, span.end)
242                })
243                .collect();
244            return (None, errors);
245        }
246    };
247
248    // Phase 2: Parse into AST using token-based parser
249    let eoi_span = primitives::Span::new((), source.len()..source.len());
250    let token_stream = tokens.as_slice().split_token_span(eoi_span);
251
252    // Use into_output_errors to get BOTH partial results AND all errors
253    let (result, errs) = agent_file_parser().parse(token_stream).into_output_errors();
254
255    let errors: Vec<String> = errs.iter().map(|e| format_parse_error(source, e)).collect();
256    (result, errors)
257}
258
259/// Parse an AgentScript file and return structured errors with span information.
260///
261/// Returns Ok only if parsing succeeds with no errors.
262/// Use `parse_with_structured_errors_all` to get partial results and all errors.
263pub fn parse_with_structured_errors(
264    source: &str,
265) -> Result<AgentFile, Vec<crate::error::ParseErrorInfo>> {
266    let (result, errors) = parse_with_structured_errors_all(source);
267    if errors.is_empty() {
268        result.ok_or_else(|| {
269            vec![crate::error::ParseErrorInfo {
270                message: "Unknown parse error".to_string(),
271                span: None,
272                expected: vec![],
273                found: None,
274                contexts: vec![],
275            }]
276        })
277    } else {
278        Err(errors)
279    }
280}
281
282/// Parse an AgentScript file and return structured errors with span information.
283///
284/// Returns both a partial AST (if recovery succeeded) and ALL errors found.
285pub fn parse_with_structured_errors_all(
286    source: &str,
287) -> (Option<AgentFile>, Vec<crate::error::ParseErrorInfo>) {
288    use crate::error::ParseErrorInfo;
289
290    // Phase 1: Lexical analysis with indentation tokens
291    let tokens = match lexer::lex_with_indentation(source) {
292        Ok(tokens) => tokens,
293        Err(errs) => {
294            let errors: Vec<ParseErrorInfo> = errs
295                .iter()
296                .map(|e| {
297                    let span = e.span();
298                    let (line, col) = offset_to_line_col(source, span.start);
299                    ParseErrorInfo {
300                        message: format!(
301                            "Lexer error at line {}, column {}: {}",
302                            line,
303                            col,
304                            e.reason()
305                        ),
306                        span: Some(span.start..span.end),
307                        expected: vec![],
308                        found: None,
309                        contexts: vec![],
310                    }
311                })
312                .collect();
313            return (None, errors);
314        }
315    };
316
317    // Phase 2: Parse into AST using token-based parser
318    let eoi_span = primitives::Span::new((), source.len()..source.len());
319    let token_stream = tokens.as_slice().split_token_span(eoi_span);
320
321    // Use into_output_errors to get BOTH partial results AND all errors
322    let (result, errs) = agent_file_parser().parse(token_stream).into_output_errors();
323
324    let errors: Vec<ParseErrorInfo> = errs
325        .iter()
326        .map(|e| {
327            let span = e.span();
328            let (line, col) = offset_to_line_col(source, span.start);
329            // Collect contexts from labelled parsers
330            let contexts: Vec<(String, std::ops::Range<usize>)> = e
331                .contexts()
332                .map(|(label, ctx_span)| (label.to_string(), ctx_span.start..ctx_span.end))
333                .collect();
334
335            ParseErrorInfo {
336                message: format!("Parse error at line {}, column {}: {}", line, col, e.reason()),
337                span: Some(span.start..span.end),
338                expected: e.expected().map(|exp| format!("{}", exp)).collect(),
339                found: e.found().map(|tok| format!("{}", tok)),
340                contexts,
341            }
342        })
343        .collect();
344
345    (result, errors)
346}
347
348// ============================================================================
349// Top-Level Agent File Parser
350// ============================================================================
351
352use crate::ast::{
353    ConfigBlock, ConnectionBlock, LanguageBlock, Spanned, StartAgentBlock, SystemBlock, TopicBlock,
354    VariablesBlock,
355};
356use crate::lexer::Token;
357
358/// Enum for tracking parsed top-level blocks.
359enum TopLevelBlock {
360    Config(Spanned<ConfigBlock>),
361    Variables(Spanned<VariablesBlock>),
362    System(Spanned<SystemBlock>),
363    StartAgent(Spanned<StartAgentBlock>),
364    Topic(Spanned<TopicBlock>),
365    Language(Spanned<LanguageBlock>),
366    Connection(Spanned<ConnectionBlock>),
367    /// Multiple connections from a `connections:` wrapper block.
368    Connections(Vec<Spanned<ConnectionBlock>>),
369}
370
371/// Parse a complete agent file.
372fn agent_file_parser<'tokens, 'src: 'tokens>() -> impl Parser<
373    'tokens,
374    ParserInput<'tokens, 'src>,
375    AgentFile,
376    extra::Err<Rich<'tokens, Token<'src>, primitives::Span>>,
377> + Clone {
378    // Recovery strategy: when parsing fails, skip until we find a top-level keyword
379    // and retry. This captures errors with proper context from .labelled() calls.
380    let recovery_until = choice((
381        just(Token::Topic).ignored(),
382        just(Token::StartAgent).ignored(),
383        just(Token::Config).ignored(),
384        just(Token::Variables).ignored(),
385        just(Token::System).ignored(),
386        just(Token::Language).ignored(),
387        just(Token::Connection).ignored(),
388        just(Token::Connections).ignored(),
389    ));
390
391    // Parse noise once, then dispatch on the block type.
392    // This avoids redundantly parsing skip_toplevel_noise() for each alternative.
393    skip_toplevel_noise()
394        .ignore_then(choice((
395            config_block().map(TopLevelBlock::Config),
396            variables_block().map(TopLevelBlock::Variables),
397            system_block().map(TopLevelBlock::System),
398            start_agent_block().map(TopLevelBlock::StartAgent),
399            topic_block().map(TopLevelBlock::Topic),
400            language_block().map(TopLevelBlock::Language),
401            connection_block().map(TopLevelBlock::Connection),
402            connections_wrapper_block().map(TopLevelBlock::Connections),
403        )))
404        .recover_with(skip_then_retry_until(any().ignored(), recovery_until))
405        .repeated()
406        .collect::<Vec<_>>()
407        .then_ignore(skip_toplevel_noise())
408        .then_ignore(end())
409        .map(|blocks| {
410            let mut file = AgentFile::default();
411
412            for block in blocks {
413                match block {
414                    TopLevelBlock::Config(c) => file.config = Some(c),
415                    TopLevelBlock::Variables(v) => file.variables = Some(v),
416                    TopLevelBlock::System(s) => file.system = Some(s),
417                    TopLevelBlock::StartAgent(sa) => file.start_agent = Some(sa),
418                    TopLevelBlock::Topic(t) => file.topics.push(t),
419                    TopLevelBlock::Language(l) => file.language = Some(l),
420                    TopLevelBlock::Connection(c) => file.connections.push(c),
421                    TopLevelBlock::Connections(cs) => file.connections.extend(cs),
422                }
423            }
424
425            file
426        })
427}