Skip to main content

mq_lang/
lib.rs

1//! `mq-lang` provides a parser and evaluator for a [mq](https://github.com/harehare/mq).
2//!
3//! ## Examples
4//!
5//! ```rs
6//! let code = "add(\"world!\")";
7//! let input = vec![mq_lang::Value::Markdown(
8//!   mq_markdown::Markdown::from_str("Hello,").unwrap()
9//! )].into_iter();
10//! let mut engine = mq_lang::DefaultEngine::default();
11//!
12//! assert!(matches!(engine.eval(&code, input).unwrap(), mq_lang::Value::String("Hello,world!".to_string())));
13//!
14//! // Parse code into AST nodes
15//! use mq_lang::{tokenize, LexerOptions, AstParser, Arena};
16//! use std::rc::Shared;
17//! use std::cell::SharedCell;
18//!
19//! let code = "1 + 2";
20//! let token_arena = Shared::new(SharedCell::new(Arena::new()));
21//! let ast = mq_lang::parse(code, token_arena).unwrap();
22//!
23//! assert_eq!(ast.nodes.len(), 1);
24//!
25//! // Parse code into CST nodes
26//! use mq_lang::{tokenize, LexerOptions, CstParser};
27//! use std::sync::Arc;
28//!
29//! let code = "1 + 2";
30//! let (cst_nodes, errors) = mq_lang::parse_recovery(code);
31//!
32//! assert!(!errors.has_errors());
33//! assert!(!cst_nodes.is_empty());
34//! ```
35//!
36//! ## Features
37//!
38//! - `ast-json`: Enables serialization and deserialization of the AST (Abstract Syntax Tree)
39//!   to/from JSON format (`ast_to_json` / `ast_from_json`). Use `Engine::compile` and
40//!   `Engine::eval_compiled` to execute programs constructed from deserialized ASTs.
41//!   When this feature is enabled, `serde` and `serde_json` dependencies are included.
42mod arena;
43mod ast;
44#[cfg(feature = "cst")]
45mod cst;
46mod engine;
47mod error;
48mod eval;
49mod ident;
50mod lexer;
51mod macro_expand;
52mod module;
53mod number;
54mod optimizer;
55mod range;
56mod selector;
57
58use lexer::Lexer;
59#[cfg(not(feature = "sync"))]
60use std::cell::RefCell;
61#[cfg(not(feature = "sync"))]
62use std::rc::Rc;
63#[cfg(feature = "sync")]
64use std::sync::Arc;
65#[cfg(feature = "sync")]
66use std::sync::RwLock;
67
68pub use arena::{Arena, ArenaId};
69pub use ast::Program;
70pub use ast::node::Expr as AstExpr;
71pub use ast::node::IdentWithToken;
72pub use ast::node::Literal as AstLiteral;
73pub use ast::node::Node as AstNode;
74pub use ast::node::Params as AstParams;
75pub use ast::node::Pattern as AstPattern;
76pub use ast::parser::Parser as AstParser;
77#[cfg(feature = "ast-json")]
78pub use ast::{ast_from_json, ast_to_json};
79pub use engine::CompiledProgram;
80pub use engine::Engine;
81pub use error::Error;
82pub use eval::builtin::{
83    BUILTIN_FUNCTION_DOC, BUILTIN_SELECTOR_DOC, BuiltinFunctionDoc, BuiltinSelectorDoc, INTERNAL_FUNCTION_DOC,
84};
85pub use eval::runtime_value::{RuntimeValue, RuntimeValues};
86pub use ident::Ident;
87pub use lexer::Options as LexerOptions;
88pub use lexer::token::{StringSegment, Token, TokenKind};
89pub use module::{
90    BUILTIN_FILE as BUILTIN_MODULE_FILE, Module, ModuleId, ModuleLoader, STANDARD_MODULES, error::ModuleError,
91    resolver::DefaultModuleResolver, resolver::ModuleResolver,
92};
93pub use optimizer::OptimizationLevel;
94pub use range::{Position, Range};
95pub use selector::{AttrKind, Selector};
96
97pub type DefaultEngine = Engine<DefaultModuleResolver>;
98pub type DefaultModuleLoader = ModuleLoader<DefaultModuleResolver>;
99
100#[cfg(feature = "cst")]
101pub use cst::incremental::{IncrementalParser, TextEdit};
102#[cfg(feature = "cst")]
103pub use cst::node::BinaryOp as CstBinaryOp;
104#[cfg(feature = "cst")]
105pub use cst::node::Node as CstNode;
106#[cfg(feature = "cst")]
107pub use cst::node::NodeKind as CstNodeKind;
108#[cfg(feature = "cst")]
109pub use cst::node::Trivia as CstTrivia;
110#[cfg(feature = "cst")]
111pub use cst::node::UnaryOp as CstUnaryOp;
112#[cfg(feature = "cst")]
113pub use cst::parser::ErrorReporter as CstErrorReporter;
114#[cfg(feature = "cst")]
115pub use cst::parser::Parser as CstParser;
116
117#[cfg(feature = "debugger")]
118pub use eval::debugger::{
119    Breakpoint, DebugContext, Debugger, DebuggerAction, DebuggerCommand, DebuggerHandler, Source,
120};
121
122use crate::ast::TokenId;
123
124pub type MqResult = Result<RuntimeValues, Box<Error>>;
125
126/// Type alias for reference-counted pointer, switches between Shared and Arc depending on "sync" feature.
127#[cfg(not(feature = "sync"))]
128pub type Shared<T> = Rc<T>;
129#[cfg(feature = "sync")]
130pub type Shared<T> = Arc<T>;
131
132/// Type alias for interior mutability, switches between SharedCell and RwLock depending on "sync" feature.
133#[cfg(not(feature = "sync"))]
134pub type SharedCell<T> = RefCell<T>;
135#[cfg(feature = "sync")]
136pub type SharedCell<T> = RwLock<T>;
137
138pub(crate) type TokenArena = Shared<SharedCell<Arena<Shared<Token>>>>;
139
140#[cfg(feature = "cst")]
141pub fn parse_recovery(code: &str) -> (Vec<Shared<CstNode>>, CstErrorReporter) {
142    let tokens = Lexer::new(lexer::Options {
143        ignore_errors: true,
144        include_spaces: true,
145    })
146    .tokenize(code, Module::TOP_LEVEL_MODULE_ID)
147    .map_err(|e| Box::new(error::Error::from_error(code, e.into(), DefaultModuleLoader::default())))
148    .unwrap();
149
150    let token_vec: Vec<Shared<Token>> = tokens.into_iter().map(Shared::new).collect();
151    CstParser::new(&token_vec).parse()
152}
153
154pub fn parse(code: &str, token_arena: TokenArena) -> Result<Program, Box<error::Error>> {
155    let tokens = Lexer::new(lexer::Options::default())
156        .tokenize(code, Module::TOP_LEVEL_MODULE_ID)
157        .map_err(|e| Box::new(error::Error::from_error(code, e.into(), DefaultModuleLoader::default())))?;
158    let mut token_arena = {
159        #[cfg(not(feature = "sync"))]
160        {
161            token_arena.borrow_mut()
162        }
163
164        #[cfg(feature = "sync")]
165        {
166            token_arena.write().unwrap()
167        }
168    };
169
170    AstParser::new(
171        tokens.into_iter().map(Shared::new).collect::<Vec<_>>().iter(),
172        &mut token_arena,
173        Module::TOP_LEVEL_MODULE_ID,
174    )
175    .parse()
176    .map_err(|e| Box::new(error::Error::from_error(code, e.into(), DefaultModuleLoader::default())))
177}
178
179/// Parses an MDX string and returns an iterator over `Value` nodes.
180pub fn parse_mdx_input(input: &str) -> miette::Result<Vec<RuntimeValue>> {
181    let mdx = mq_markdown::Markdown::from_mdx_str(input)?;
182    Ok(mdx.nodes.into_iter().map(RuntimeValue::from).collect())
183}
184
185pub fn parse_html_input(input: &str) -> miette::Result<Vec<RuntimeValue>> {
186    let html = mq_markdown::Markdown::from_html_str(input)?;
187    Ok(html.nodes.into_iter().map(RuntimeValue::from).collect())
188}
189
190pub fn parse_html_input_with_options(
191    input: &str,
192    options: mq_markdown::ConversionOptions,
193) -> miette::Result<Vec<RuntimeValue>> {
194    let html = mq_markdown::Markdown::from_html_str_with_options(input, options)?;
195    Ok(html.nodes.into_iter().map(RuntimeValue::from).collect())
196}
197
198/// Parses a Markdown string and returns an iterator over `Value` nodes.
199pub fn parse_markdown_input(input: &str) -> miette::Result<Vec<RuntimeValue>> {
200    let md = mq_markdown::Markdown::from_markdown_str(input)?;
201    Ok(md.nodes.into_iter().map(RuntimeValue::from).collect())
202}
203
204/// Parses a plain text string and returns an iterator over `Value` node.
205pub fn parse_text_input(input: &str) -> miette::Result<Vec<RuntimeValue>> {
206    Ok(input.lines().map(|line| line.to_string().into()).collect())
207}
208
209/// Returns a vector containing a single `Value` representing an empty input.
210pub fn null_input() -> Vec<RuntimeValue> {
211    vec!["".to_string().into()]
212}
213
214/// Parses a raw input string and returns a vector containing a single `Value` node.
215pub fn raw_input(input: &str) -> Vec<RuntimeValue> {
216    vec![input.to_string().into()]
217}
218
219/// Returns a vector containing a single `RuntimeValue::Bytes` for raw binary input.
220pub fn bytes_input(bytes: &[u8]) -> Vec<RuntimeValue> {
221    vec![RuntimeValue::Bytes(bytes.to_vec())]
222}
223
224#[inline(always)]
225pub(crate) fn token_alloc(arena: &TokenArena, token: &Shared<Token>) -> TokenId {
226    #[cfg(not(feature = "sync"))]
227    {
228        arena.borrow_mut().alloc(Shared::clone(token))
229    }
230
231    #[cfg(feature = "sync")]
232    {
233        arena.write().unwrap().alloc(Shared::clone(token))
234    }
235}
236
237#[inline(always)]
238pub(crate) fn get_token(arena: TokenArena, token_id: TokenId) -> Shared<Token> {
239    #[cfg(not(feature = "sync"))]
240    {
241        Shared::clone(&arena.borrow()[token_id])
242    }
243
244    #[cfg(feature = "sync")]
245    {
246        Shared::clone(&arena.read().unwrap()[token_id])
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_eval_basic() {
256        let code = "add(\"world!\")";
257        let input = mq_markdown::Markdown::from_markdown_str("Hello,").unwrap();
258        let mut engine = DefaultEngine::default();
259
260        assert_eq!(
261            engine
262                .eval(
263                    code,
264                    input
265                        .nodes
266                        .into_iter()
267                        .map(RuntimeValue::from)
268                        .collect::<Vec<_>>()
269                        .into_iter()
270                )
271                .unwrap(),
272            vec![RuntimeValue::Markdown(
273                Box::new(mq_markdown::Node::Text(mq_markdown::Text {
274                    value: "Hello,world!".to_string(),
275                    position: None
276                },)),
277                None
278            )]
279            .into()
280        );
281    }
282
283    #[test]
284    fn test_parse_error_syntax() {
285        let code = "add(1,";
286        let token_arena = Shared::new(SharedCell::new(Arena::new(10)));
287        let result = parse(code, token_arena);
288
289        assert!(result.is_err());
290    }
291
292    #[test]
293    fn test_parse_error_lexer() {
294        let code = "add(1, `unclosed string)";
295        let token_arena = Shared::new(SharedCell::new(Arena::new(10)));
296        let result = parse(code, token_arena);
297
298        assert!(result.is_err());
299    }
300
301    #[test]
302    #[cfg(feature = "cst")]
303    fn test_parse_recovery_success() {
304        let code = "add(1, 2)";
305        let (cst_nodes, errors) = parse_recovery(code);
306
307        assert!(!errors.has_errors());
308        assert!(!cst_nodes.is_empty());
309    }
310
311    #[test]
312    #[cfg(feature = "cst")]
313    fn test_parse_recovery_with_errors() {
314        let code = "add(1,";
315        let (cst_nodes, errors) = parse_recovery(code);
316
317        assert!(errors.has_errors());
318        assert!(cst_nodes.is_empty());
319    }
320
321    #[test]
322    #[cfg(feature = "cst")]
323    fn test_parse_recovery_with_error_lexer() {
324        let code = "add(1, \"";
325        let (cst_nodes, errors) = parse_recovery(code);
326
327        assert!(errors.has_errors());
328        assert!(cst_nodes.is_empty());
329    }
330
331    #[test]
332    fn test_parse_markdown_input() {
333        let input = "# Heading\n\nSome text.";
334        let result = parse_markdown_input(input);
335        assert!(result.is_ok());
336        let values: Vec<RuntimeValue> = result.unwrap();
337        assert!(!values.is_empty());
338    }
339
340    #[test]
341    fn test_parse_mdx_input() {
342        let input = "# Heading\n\nSome text.";
343        let result = parse_mdx_input(input);
344        assert!(result.is_ok());
345        let values: Vec<RuntimeValue> = result.unwrap();
346        assert!(!values.is_empty());
347    }
348
349    #[test]
350    fn test_parse_text_input() {
351        let input = "line1\nline2\nline3";
352        let result = parse_text_input(input);
353        assert!(result.is_ok());
354        let values: Vec<RuntimeValue> = result.unwrap();
355        assert_eq!(values.len(), 3);
356    }
357
358    #[test]
359    fn test_parse_html_input() {
360        let input = "<h1>Heading</h1><p>Some text.</p>";
361        let result = parse_html_input(input);
362        assert!(result.is_ok());
363        let values: Vec<RuntimeValue> = result.unwrap();
364        assert!(!values.is_empty());
365    }
366
367    #[test]
368    fn test_parse_html_input_with_options() {
369        let input = r#"<html>
370      <head>
371        <title>Title</title>
372        <meta name="description" content="This is a test meta description.">
373        <script>let foo = 'bar'</script>
374      </head>
375      <body>
376        <p>Some text.</p>
377      </body>
378    </html>"#;
379        let result = parse_html_input_with_options(
380            input,
381            mq_markdown::ConversionOptions {
382                extract_scripts_as_code_blocks: true,
383                generate_front_matter: true,
384                use_title_as_h1: true,
385            },
386        );
387        assert!(result.is_ok());
388        assert_eq!(
389            mq_markdown::Markdown::new(
390                result
391                    .unwrap()
392                    .iter()
393                    .map(|value| match value {
394                        RuntimeValue::Markdown(node, _) => (**node).clone(),
395                        _ => value.to_string().into(),
396                    })
397                    .collect()
398            )
399            .to_string(),
400            "---
401description: This is a test meta description.
402title: Title
403---
404
405# Title
406
407```
408let foo = 'bar'
409```
410
411Some text.
412"
413        );
414    }
415}