regex 0.1.62

An implementation of regular expressions for Rust. This implementation uses finite automata and guarantees linear time matching on all inputs.
Documentation
// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// This module defines the internal parameter interface to matching engines.
// This doesn't actually contain any searching code.

/// Search is a set of parameters that influence the behavior of matching
/// engines. In general, a given set of search parameters influence all the
/// matching engines in the same way.
#[derive(Debug)]
pub struct Params<'c, 'm> {
    /// Storage for capture slots.
    captures: &'c mut [Slot],
    /// Which regular expression matched. For a `Regex`, this always has
    /// length 1, but a `RegexSet` has length equal to the number of regexes
    /// in the set.
    matches: &'m mut [bool],
    /// Whether `true` exists in `matches`.
    matched_any: bool,
    /// When true, always use "match-short" semantics.
    match_short: bool,
}

/// Slot is a single saved capture location. Note that there are two slots for
/// every capture in a regular expression (one slot each for the start and end
/// of the capture).
pub type Slot = Option<usize>;

/// The matching semantics implied by a given set of search parameters.
///
/// In general, each semantics is more expensive than the previous one.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MatchStyle {
    /// "match-short" semantics means that the matching engine will do only
    /// as much work as needed to discover whether an expression matches
    /// the given text or not. Generally, match-short semantics means that
    /// capture locations are not tracked. However, if memory is given to track
    /// captures, then the ending location of the *shortest match* will be
    /// returned. The starting location may not be returned.
    Short,
    /// "all-match" semantics means that more than one regular expression is
    /// being executed and that the matching engine should answer whether each
    /// regular expression matches or not. (This is only applicable for regex
    /// sets.)
    All,
    /// "find" semantics means that the matching engine will find the proper
    /// leftmost-first match and return the start and end location of that
    /// match. Other capture groups are not tracked.
    Find,
    /// "capture" semantics means that the locations of all captures in the
    /// regular expression are tracked.
    Capture,
}

impl MatchStyle {
    pub fn match_only(&self) -> bool {
        match *self {
            MatchStyle::Short | MatchStyle::All => true,
            _ => false,
        }
    }

    pub fn quit_after_first_match(&self) -> bool {
        *self == MatchStyle::Short
    }
}

impl<'c, 'm> Params<'c, 'm> {
    /// Creates a new set of search parameters.
    ///
    /// A default set of search parameters results in match-short semantics.
    pub fn new(caps: &'c mut [Slot], mats: &'m mut [bool]) -> Params<'c, 'm> {
        Params {
            captures: caps,
            matches: mats,
            matched_any: false,
            match_short: false,
        }
    }

    /// Resets the parameters as if no search has happened.
    ///
    /// This is useful for reusing the same set of parameters for multiple
    /// independent searches.
    ///
    /// This method never changes the match semantics implied by the
    /// parameters.
    pub fn reset(&mut self) {
        for slot in self.captures.iter_mut() {
            *slot = None;
        }
        for m in self.matches.iter_mut() {
            *m = false;
        }
    }

    /// Force match-short semantics.
    ///
    /// When this is enabled (it's disabled by default), match-short semantics
    /// will always be used.
    ///
    /// Note that if backing storage for captures is provided then some of them
    /// may be filled in after a search (but may not represent leftmost-first
    /// match locations).
    pub fn set_match_short(mut self, yes: bool) -> Params<'c, 'm> {
        self.match_short = yes;
        self
    }

    pub fn alloc_captures(n: usize) -> Vec<Slot> {
        vec![None; 2 * n]
    }

    pub fn alloc_matches(n: usize) -> Vec<bool> {
        vec![false; n]
    }

    /// Returns all capture slots. (There are 2 slots for every capture group.)
    pub fn captures(&self) -> &[Slot] {
        &self.captures
    }

    /// Returns mutable capture slots. (There are 2 slots for every capture
    /// group.)
    pub fn captures_mut(&mut self) -> &mut [Slot] {
        &mut self.captures
    }

    /// Returns true if one or more regular expressions matched.
    pub fn is_match(&self) -> bool {
        self.matched_any
    }

    /// Returns all matches.
    pub fn matches(&self) -> &[bool] {
        &self.matches
    }

    /// Sets the capture slot at index `i` to the slot given.
    ///
    /// If `i` does not point to a valid capture slot, then this is a no-op.
    #[doc(hidden)]
    pub fn set_capture(&mut self, i: usize, slot: Slot) {
        if let Some(old_slot) = self.captures.get_mut(i) {
            *old_slot = slot;
        }
    }

    /// Sets the start location of the match.
    #[doc(hidden)]
    pub fn set_start(&mut self, slot: Slot) {
        self.set_capture(0, slot)
    }

    /// Set the end location of the match.
    #[doc(hidden)]
    pub fn set_end(&mut self, slot: Slot) {
        self.set_capture(1, slot)
    }

    /// Copies the given capture slots into self's capture slots.
    #[doc(hidden)]
    pub fn copy_captures_from(&mut self, caps: &[Slot]) {
        for (slot, val) in self.captures.iter_mut().zip(caps.iter()) {
            *slot = *val;
        }
    }

    /// Indicates that the regular expression at index `i` matches.
    #[doc(hidden)]
    pub fn set_match(&mut self, i: usize) {
        self.matched_any = true;
        if let Some(old) = self.matches.get_mut(i) {
            *old = true;
        }
    }

    /// Returns the style of matching implied by these parameters.
    #[doc(hidden)]
    pub fn style(&self) -> MatchStyle {
        if self.match_short {
            MatchStyle::Short
        } else if self.captures.is_empty() {
            if self.matches.len() > 1 {
                MatchStyle::All
            } else {
                MatchStyle::Short
            }
        } else if self.captures.len() > 2 {
            MatchStyle::Capture
        } else {
            MatchStyle::Find
        }
    }
}