1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
//! # Scope Graphs Regular Expressions
//!
//! To query a scope graph,
//! you have to specify what paths are valid for the query to take.
//! You do so using a regular expression over the labels on the edges of the scope graph.
//!
//! See <https://docs.rs/scopegraphs> for all documentation
#![warn(missing_docs)]
#![cfg_attr(not(docsrs), allow(rustdoc::broken_intra_doc_links))]

use proc_macro2::{LexError, TokenStream};
use thiserror::Error;

mod compile;
#[cfg(feature = "dynamic")]
mod dynamic;
mod parse;
mod regex;

mod emit;

#[cfg(feature = "dot")]
mod dot;

pub use compile::Automaton;
#[cfg(feature = "dynamic")]
pub use dynamic::DynamicMatcher;
pub use regex::Regex;

/// A type that can match a regex. Can be created at compile time
/// through the [`compile_regex`](scopegraphs::compile_regex) macro,
/// or at runtime with the `dynamic` feature through [`Automaton::matcher`].
pub trait RegexMatcher<A>: Clone {
    /// Takes a transition in the state machine, accepting a single symbol.
    ///
    /// If the symbol was not accepted, because the regex doesn't allow it, the new state is empty,
    /// which can be checked using [`RegexMatcher::is_empty`].
    fn step(&mut self, inp: A);

    /// Steps once for each element in the iterator
    fn step_many(&mut self, inp: impl IntoIterator<Item = A>) {
        for i in inp {
            self.step(i);
        }
    }

    /// Steps once for each element in the iterator
    fn step_unless_empty(&mut self, inp: impl IntoIterator<Item = A>) {
        for i in inp {
            self.step(i);
            if self.is_empty() {
                return;
            }
        }
    }

    /// Returns true if the regular expression accepts the input iterator
    fn accepts(&self, iter: impl IntoIterator<Item = A>) -> bool {
        let mut this = self.clone();
        this.step_unless_empty(iter);
        this.is_accepting()
    }

    /// Returns true if the regular expression accepts a word of which the input iterator is a prefix
    fn accepts_prefix(&self, iter: impl IntoIterator<Item = A>) -> bool {
        let mut this = self.clone();
        this.step_unless_empty(iter);
        !this.is_empty()
    }

    /// Returns whether this state is a final state.
    ///
    /// A Final state is a state without outgoing edges
    fn is_final(&self) -> bool;

    /// Returns whether the current state would accept the regular expression.
    fn is_accepting(&self) -> bool;

    /// Returns whether the regular expression is stuck:
    /// there are no more outgoing edges and it's not currently accepting.
    fn is_empty(&self) -> bool {
        self.is_final() && !self.is_accepting()
    }
}

/// An error that can ocur while parsing a regular expression.
#[derive(Error, Debug)]
pub enum ParseError {
    /// You used tokens that aren't valid Rust tokens
    ///
    /// (scope graph regular expressions need to both be valid Rust tokens,
    /// as well as having a valid regular expression syntax)
    #[error("lex error: {0}")]
    Lex(#[from] LexError),

    /// Incorrect syntax for regular expressions.
    ///
    /// For correct syntax, see [`parse_regex`]
    #[error("parse error: {0}")]
    Parse(#[from] syn::Error),
}

/// parse a string to a regular expression
///
/// the syntax of a regular expression is:
/// ```bnf
/// E -> 0
/// E -> e
/// E -> <ident>
/// E -> ~ E
/// E -> E *
/// E -> E +
/// E -> E ?
/// E -> E E
/// E -> E | E
/// E -> E & E
/// ```
///
/// The `<ident>` needs to be a valid rust identifier
pub fn parse_regex(input: impl AsRef<str>) -> Result<Regex, ParseError> {
    let stream: TokenStream = input.as_ref().parse()?;
    Ok(parse_regex_token_stream(stream)?)
}

/// parse a rust [`TokenStream`](TokenStream) to a regular expression
///
/// // TODO: figure out to add a link to scopegraphs-macros
/// Implementation detail for [`scopegraphs-macros`]()
#[doc(hidden)]
pub fn parse_regex_token_stream(input: TokenStream) -> syn::Result<Regex> {
    syn::parse2(input)
}