pomsky/exprs/
boundary.rs

1//! Implements _boundaries_. The analogues in the regex world are
2//! [word boundaries](https://www.regular-expressions.info/wordboundaries.html) and
3//! [anchors](https://www.regular-expressions.info/anchors.html).
4
5use pomsky_syntax::exprs::{Boundary, BoundaryKind};
6
7use crate::{
8    compile::{CompileResult, CompileState},
9    diagnose::CompileErrorKind,
10    options::{CompileOptions, RegexFlavor},
11    regex::Regex,
12};
13
14use super::RuleExt;
15
16impl<'i> RuleExt<'i> for Boundary {
17    fn compile<'c>(
18        &'c self,
19        options: CompileOptions,
20        state: &mut CompileState<'c, 'i>,
21    ) -> CompileResult<'i> {
22        use BoundaryKind::*;
23
24        if options.flavor == RegexFlavor::JavaScript
25            && !state.ascii_only
26            && matches!(self.kind, Word | NotWord | WordStart | WordEnd)
27        {
28            Err(CompileErrorKind::JsWordBoundaryInUnicodeMode.at(self.span))
29        } else if options.flavor == RegexFlavor::Ruby && state.in_lookbehind {
30            Err(CompileErrorKind::RubyLookaheadInLookbehind { was_word_boundary: true }
31                .at(self.span))
32        } else {
33            Ok(Regex::Boundary(self.kind))
34        }
35    }
36}
37
38pub(crate) fn boundary_kind_codegen(bk: BoundaryKind, buf: &mut String, flavor: RegexFlavor) {
39    match bk {
40        BoundaryKind::Start => buf.push('^'),
41        BoundaryKind::End => buf.push('$'),
42
43        BoundaryKind::Word => buf.push_str(r"\b"),
44        BoundaryKind::NotWord => buf.push_str(r"\B"),
45
46        BoundaryKind::WordStart => buf.push_str(match flavor {
47            RegexFlavor::Pcre => "[[:<:]]",
48            RegexFlavor::Rust => r"\<",
49            _ => r"(?<!\w)(?=\w)",
50        }),
51        BoundaryKind::WordEnd => buf.push_str(match flavor {
52            RegexFlavor::Pcre => "[[:>:]]",
53            RegexFlavor::Rust => r"\>",
54            _ => r"(?<=\w)(?!\w)",
55        }),
56    }
57}