trivet 3.1.0

The trivet Parser Library
Documentation
// Trivet
// Copyright (c) 2025 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/binary-tools/trivet

//! Parse comments in various forms.

use crate::ParserCore;
use std::boxed::Box;

/// Parse comments in various common forms.  Several common comment forms are understood.
///
/// |Comment Style  |Flag                |Default |
/// |---------------|--------------------|--------|
/// |`/* ... */`    |`enable_c`          |true    |
/// |`// ...`       |`enable_cpp`        |true    |
/// |`<# ... #>`    |`enable_powershell` |false   |
/// |`# ...`        |`enable_python`     |false   |
/// |`<!-- ... -->` |`enable_xml`        |false   |
///
/// To use, create a new instance with [`CommentParser::new`] and then enable or disable
/// flags, depending on what comment forms you want to process.  Finally, use the
/// [`CommentParser::process`] method to consume all comments and whitespace.
///
/// In addition to these comment forms, you can add your own.  Define a method that does
/// the following.
///
/// - Accepts a [`ParserCore`] instance
/// - Consumes any comments you wish
/// - Returns `true` if any comments are consumed
///
/// The return value is important; it is used to determine when all comments have been
/// consumed for [`CommentParser::process`].
///
/// Set the `custom` field to a `Box` instance containing your method, and then set
/// `enable_custom` to true.
///
/// The following is an example that parses Lua comments, which are a bit notorious
/// for being persnickety.  `--[[` starts a multi-line comment, but `---[[` starts
/// a single-line comment (because the `--` begins a single line comment unless
/// immediately followed by `[[`).
///
/// ```rust
/// use trivet::parse_from_string;
/// use trivet::Parser;
/// use trivet::ParserCore;
/// use trivet::parsers::comments::CommentParser;
///
/// let mut parser = parse_from_string("--[[ I am a long\nLua comment. --]]");
/// let mut compar = CommentParser::new();
/// compar.enable_c = false;
/// compar.enable_cpp = false;
/// compar.custom = Box::new(
///     |parser: &mut ParserCore| -> bool {
///         if parser.peek_and_consume_chars(&['-','-','[','[']) {
///             parser.take_until("--]]");
///             true
///         } else if parser.peek_and_consume_chars(&['-','-']) {
///             parser.take_while(|ch| ch != '\n');
///             true
///         } else {
///             false
///         }
///     }
/// );
/// compar.enable_custom = true;
/// compar.process(&mut parser.borrow_core());
/// #[cfg(not(feature = "no_tracking"))]
/// assert_eq!(parser.loc().to_string(), "<string>:2:18");
/// assert!(parser.is_at_eof());
/// ```
///
/// The `#[cfg]` is required so this documentation test will pass with `no_tracking` enabled.
pub struct CommentParser {
    /// Enable processing of C-style comments of the form `/*...*/`.
    pub enable_c: bool,
    /// Enable processing of C++-style comments of the form `//...`.
    pub enable_cpp: bool,
    /// Enable processing PowerShell-style comments of the form `<#...#>`.
    pub enable_powershell: bool,
    /// Enable processing Python-style comments of the form `#...`.
    pub enable_python: bool,
    /// Enable processing XML-style comments of the form `<!--...-->`.  This does not
    /// enable the `<![IGNORE[...]]>` "mega" form.
    pub enable_xml: bool,
    /// Enable processing a custom comment form.  You must also provide a closure.
    pub enable_custom: bool,
    /// A custom comment parser can be defined here.  You must enable custom comment processing.
    /// Please note that this takes a [`ParserCore`] instance.  This is essential so we can keep
    /// a dependency cycle from forming.
    pub custom: Box<dyn Fn(&mut ParserCore) -> bool>,
}

impl Default for CommentParser {
    fn default() -> Self {
        Self::new()
    }
}

impl CommentParser {
    /// Make a new comment parser.  By default this will process C and C++-style comments.
    /// You can change that at any time using the flags.
    pub fn new() -> Self {
        CommentParser {
            enable_c: true,
            enable_cpp: true,
            enable_powershell: false,
            enable_python: false,
            enable_xml: false,
            enable_custom: false,
            custom: Box::new(|_| false),
        }
    }

    /// Consume all comments in the stream.  This also consumes any whitespace before,
    /// between, and trailing comments.  True is returned if anything is consumed.
    /// On return the parser will be at end of file or pointing to the first non-comment,
    /// non-whitespace character in the stream.
    pub fn process(&self, parser: &mut ParserCore) -> bool {
        let mut found = parser.consume_ws_only();
        let mut hit = true;

        while hit {
            hit = false;

            // Check for each of the known forms of comments.
            if self.enable_c {
                while parser.peek_and_consume_chars(&['/', '*']) {
                    // Consume everything until the terminating token is found.
                    parser.consume_until("*/");
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
            if self.enable_cpp {
                while parser.peek_and_consume_chars(&['/', '/']) {
                    // Consume everything until the terminating token is found.
                    parser.consume_while(|c| c != '\n');
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
            if self.enable_powershell {
                while parser.peek_and_consume_chars(&['<', '#']) {
                    // Consume everything until the terminating token is found.
                    parser.consume_until("#>");
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
            if self.enable_python {
                while parser.peek_and_consume('#') {
                    // Consume everything until the terminating token is found.
                    parser.consume_while(|c| c != '\n');
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
            if self.enable_xml {
                while parser.peek_and_consume_chars(&['<', '!', '-', '-']) {
                    // Consume everything until the terminating token is found.
                    parser.consume_until("-->");
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
            if self.enable_custom {
                while (self.custom)(parser) {
                    parser.consume_ws_only();
                    hit = true;
                    found = true;
                }
            }
        }

        found
    }
}