syntect-no-panic 4.6.1

temporary fork of Syntect (don't use)
Documentation
// see DESIGN.md
use std::collections::HashMap;
use std::u16;
use std::sync::Mutex;
use std::fmt;
use std::str::FromStr;
use std::u64;
use std::cmp::{Ordering, min};
use std::mem;

use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::de::{Error, Visitor};

/// Multiplier on the power of 2 for MatchPower. This is only useful if you compute your own
/// [`MatchPower`] scores
///
/// [`MatchPower`]: struct.MatchPower.html
pub const ATOM_LEN_BITS: u16 = 3;

lazy_static! {
    /// The global scope repo, exposed in case you want to minimize locking and unlocking.
    ///
    /// Ths shouldn't be necessary for you to use. See the [`ScopeRepository`] docs.
    ///
    /// [`ScopeRepository`]: struct.ScopeRepository.html
    pub static ref SCOPE_REPO: Mutex<ScopeRepository> = Mutex::new(ScopeRepository::new());
}

/// A hierarchy of atoms with semi-standardized names used to accord semantic information to a
/// specific piece of text.
///
/// These are generally written with the atoms separated by dots, and - by convention - atoms are
/// all lowercase alphanumeric.
///
/// Example scopes: `text.plain`, `punctuation.definition.string.begin.ruby`,
/// `meta.function.parameters.rust`
///
/// `syntect` uses an optimized format for storing these that allows super fast comparison and
/// determining if one scope is a prefix of another. It also always takes 16 bytes of space. It
/// accomplishes this by using a global repository to store string values and using bit-packed 16
/// bit numbers to represent and compare atoms. Like "atoms" or "symbols" in other languages. This
/// means that while comparing and prefix are fast, extracting a string is relatively slower but
/// ideally should be very rare.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Default, Hash)]
pub struct Scope {
    a: u64,
    b: u64,
}

/// Not all strings are valid scopes
#[derive(Debug)]
pub enum ParseScopeError {
    /// Due to a limitation of the current optimized internal representation
    /// scopes can be at most 8 atoms long
    TooLong,
    /// The internal representation uses 16 bits per atom, so if all scopes ever
    /// used by the program have more than 2^16-2 atoms, things break
    TooManyAtoms,
}

/// The structure used to keep track of the mapping between scope atom numbers and their string
/// names
///
/// It is only exposed in case you want to lock [`SCOPE_REPO`] and then allocate a bunch of scopes
/// at once without thrashing the lock. In general, you should just use [`Scope::new()`].
///
/// Only [`Scope`]s created by the same repository have valid comparison results.
///
/// [`SCOPE_REPO`]: struct.SCOPE_REPO.html
/// [`Scope::new()`]: struct.Scope.html#method.new
/// [`Scope`]: struct.Scope.html
#[derive(Debug)]
pub struct ScopeRepository {
    atoms: Vec<String>,
    atom_index_map: HashMap<String, usize>,
}

/// A stack/sequence of scopes for representing hierarchies for a given token of text
///
/// This is also used within [`ScopeSelectors`].
///
/// In Sublime Text, the scope stack at a given point can be seen by pressing `ctrl+shift+p`. Also
/// see [the TextMate docs](https://manual.macromates.com/en/scope_selectors).
///
/// Example for a JS string inside a script tag in a Rails `ERB` file:
/// `text.html.ruby text.html.basic source.js.embedded.html string.quoted.double.js`
///
/// [`ScopeSelectors`]: ../highlighting/struct.ScopeSelectors.html
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
pub struct ScopeStack {
    clear_stack: Vec<Vec<Scope>>,
    pub scopes: Vec<Scope>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
pub enum ClearAmount {
    TopN(usize),
    All,
}

/// A change to a scope stack
///
/// Generally, `Noop` is only used internally and you won't need to worry about getting one back
/// from calling a public function.
///
/// The change from a `ScopeStackOp` can be applied via [`ScopeStack::apply`].
///
/// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ScopeStackOp {
    Push(Scope),
    Pop(usize),
    /// Used for the `clear_scopes` feature
    Clear(ClearAmount),
    /// Restores cleared scopes
    Restore,
    Noop,
}

/// Used for [`ScopeStack::apply_with_hook`]
///
/// [`ScopeStack::apply_with_hook`]: struct.ScopeStack.html#method.apply_with_hook
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BasicScopeStackOp {
    Push(Scope),
    Pop,
}

fn pack_as_u16s(atoms: &[usize]) -> Result<Scope, ParseScopeError> {
    let mut res = Scope { a: 0, b: 0 };

    for (i, &n) in atoms.iter().enumerate() {
        if n >= (u16::MAX as usize) - 2 {
            return Err(ParseScopeError::TooManyAtoms);
        }
        let small = (n + 1) as u64; // +1 since we reserve 0 for unused

        if i < 4 {
            let shift = (3 - i) * 16;
            res.a |= small << shift;
        } else {
            let shift = (7 - i) * 16;
            res.b |= small << shift;
        }
    }
    Ok(res)
}

impl ScopeRepository {
    fn new() -> ScopeRepository {
        ScopeRepository {
            atoms: Vec::new(),
            atom_index_map: HashMap::new(),
        }
    }

    pub fn build(&mut self, s: &str) -> Result<Scope, ParseScopeError> {
        if s.is_empty() {
            return Ok(Scope { a: 0, b: 0 });
        }
        let parts: Vec<usize> = s.trim_end_matches('.').split('.').map(|a| self.atom_to_index(a)).collect();
        if parts.len() > 8 {
            return Err(ParseScopeError::TooManyAtoms);
        }
        pack_as_u16s(&parts[..])
    }

    pub fn to_string(&self, scope: Scope) -> String {
        let mut s = String::new();
        for i in 0..8 {
            let atom_number = scope.atom_at(i);
            // println!("atom {} of {:x}-{:x} = {:x}",
            //     i, scope.a, scope.b, atom_number);
            if atom_number == 0 {
                break;
            }
            if i != 0 {
                s.push('.');
            }
            s.push_str(self.atom_str(atom_number));
        }
        s
    }

    fn atom_to_index(&mut self, atom: &str) -> usize {
        if let Some(index) = self.atom_index_map.get(atom) {
            return *index;
        }

        self.atoms.push(atom.to_owned());
        let index = self.atoms.len() - 1;
        self.atom_index_map.insert(atom.to_owned(), index);

        index
    }

    /// Return the string for an atom number returned by [`Scope::atom_at`]
    ///
    /// [`Scope::atom_at`]: struct.Scope.html#method.atom_at
    pub fn atom_str(&self, atom_number: u16) -> &str {
        &self.atoms[(atom_number - 1) as usize]
    }
}

impl Scope {
    /// Parses a `Scope` from a series of atoms separated by dot (`.`) characters
    ///
    /// Example: `Scope::new("meta.rails.controller")`
    pub fn new(s: &str) -> Result<Scope, ParseScopeError> {
        let mut repo = SCOPE_REPO.lock().unwrap();
        repo.build(s.trim())
    }

    /// Gets the atom number at a given index.
    ///
    /// I can't think of any reason you'd find this useful. It is used internally for turning a
    /// scope back into a string.
    pub fn atom_at(self, index: usize) -> u16 {
        let shifted = if index < 4 {
            self.a >> ((3 - index) * 16)
        } else if index < 8 {
            self.b >> ((7 - index) * 16)
        } else {
            panic!("atom index out of bounds {:?}", index);
        };
        (shifted & 0xFFFF) as u16
    }

    #[inline]
    fn missing_atoms(self) -> u32 {
        let trail = if self.b == 0 {
            self.a.trailing_zeros() + 64
        } else {
            self.b.trailing_zeros()
        };
        trail / 16
    }

    /// Returns the number of atoms in the scope
    #[inline(always)]
    pub fn len(self) -> u32 {
        8 - self.missing_atoms()
    }

    pub fn is_empty(self) -> bool {
        self.len() == 0
    }

    /// Returns a string representation of this scope
    ///
    /// This requires locking a global repo and shouldn't be done frequently.
    pub fn build_string(self) -> String {
        let repo = SCOPE_REPO.lock().unwrap();
        repo.to_string(self)
    }

    /// Tests if this scope is a prefix of another scope. Note that the empty scope is always a
    /// prefix.
    ///
    /// This operation uses bitwise operations and is very fast
    /// # Examples
    ///
    /// ```
    /// use syntect::parsing::Scope;
    /// assert!( Scope::new("string").unwrap()
    ///         .is_prefix_of(Scope::new("string.quoted").unwrap()));
    /// assert!( Scope::new("string.quoted").unwrap()
    ///         .is_prefix_of(Scope::new("string.quoted").unwrap()));
    /// assert!( Scope::new("").unwrap()
    ///         .is_prefix_of(Scope::new("meta.rails.controller").unwrap()));
    /// assert!(!Scope::new("source.php").unwrap()
    ///         .is_prefix_of(Scope::new("source").unwrap()));
    /// assert!(!Scope::new("source.php").unwrap()
    ///         .is_prefix_of(Scope::new("source.ruby").unwrap()));
    /// assert!(!Scope::new("meta.php").unwrap()
    ///         .is_prefix_of(Scope::new("source.php").unwrap()));
    /// assert!(!Scope::new("meta.php").unwrap()
    ///         .is_prefix_of(Scope::new("source.php.wow").unwrap()));
    /// ```
    pub fn is_prefix_of(self, s: Scope) -> bool {
        let pref_missing = self.missing_atoms();

        // TODO: test optimization - use checked shl and then mult carry flag as int by -1
        let mask: (u64, u64) = if pref_missing == 8 {
            (0, 0)
        } else if pref_missing == 4 {
            (u64::MAX, 0)
        } else if pref_missing > 4 {
            (u64::MAX << ((pref_missing - 4) * 16), 0)
        } else {
            (u64::MAX, u64::MAX << (pref_missing * 16))
        };

        // xor to find the difference
        let ax = (self.a ^ s.a) & mask.0;
        let bx = (self.b ^ s.b) & mask.1;
        // println!("{:x}-{:x} is_pref {:x}-{:x}: missing {} mask {:x}-{:x} xor {:x}-{:x}",
        //     self.a, self.b, s.a, s.b, pref_missing, mask.0, mask.1, ax, bx);

        ax == 0 && bx == 0
    }
}

impl FromStr for Scope {
    type Err = ParseScopeError;

    fn from_str(s: &str) -> Result<Scope, ParseScopeError> {
        Scope::new(s)
    }
}

impl fmt::Display for Scope {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let s = self.build_string();
        write!(f, "{}", s)
    }
}

impl fmt::Debug for Scope {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let s = self.build_string();
        write!(f, "<{}>", s)
    }
}

impl Serialize for Scope {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
        let s = self.build_string();
        serializer.serialize_str(&s)
    }
}

impl<'de> Deserialize<'de> for Scope {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {

        struct ScopeVisitor;

        impl<'de> Visitor<'de> for ScopeVisitor {
            type Value = Scope;

            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
                formatter.write_str("a string")
            }

            fn visit_str<E>(self, v: &str) -> Result<Scope, E> where E: Error {
                Scope::new(v).map_err(|e| Error::custom(format!("Invalid scope: {:?}", e)))
            }
        }

        deserializer.deserialize_str(ScopeVisitor)
    }
}

/// Wrapper to get around the fact Rust `f64` doesn't implement `Ord` and there is no non-NaN
/// float type
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
pub struct MatchPower(pub f64);

impl Eq for MatchPower {}

#[allow(clippy::derive_ord_xor_partial_ord)] // The code works, so let's keep using it
impl Ord for MatchPower {
    fn cmp(&self, other: &Self) -> Ordering {
        self.partial_cmp(other).unwrap()
    }
}

impl ScopeStack {
    pub fn new() -> ScopeStack {
        ScopeStack {
            clear_stack: Vec::new(),
            scopes: Vec::new()
        }
    }

    /// Note: creating a ScopeStack with this doesn't contain information
    /// on what to do when `clear_scopes` contexts end.
    pub fn from_vec(v: Vec<Scope>) -> ScopeStack {
        ScopeStack {
            clear_stack: Vec::new(),
            scopes: v
        }
    }

    #[inline]
    pub fn push(&mut self, s: Scope) {
        self.scopes.push(s);
    }

    #[inline]
    pub fn pop(&mut self) {
        self.scopes.pop();
    }

    /// Modifies this stack according to the operation given
    ///
    /// Use this to create a stack from a `Vec` of changes given by the parser.
    pub fn apply(&mut self, op: &ScopeStackOp) {
        self.apply_with_hook(op, |_,_|{})
    }

    /// Modifies this stack according to the operation given and calls the hook for each basic operation.
    ///
    /// Like [`apply`] but calls `hook` for every basic modification (as defined by
    /// [`BasicScopeStackOp`]). Use this to do things only when the scope stack changes.
    ///
    /// [`apply`]: #method.apply
    /// [`BasicScopeStackOp`]: enum.BasicScopeStackOp.html
    #[inline]
    pub fn apply_with_hook<F>(&mut self, op: &ScopeStackOp, mut hook: F)
        where F: FnMut(BasicScopeStackOp, &[Scope])
    {
        match *op {
            ScopeStackOp::Push(scope) => {
                self.scopes.push(scope);
                hook(BasicScopeStackOp::Push(scope), self.as_slice());
            }
            ScopeStackOp::Pop(count) => {
                for _ in 0..count {
                    self.scopes.pop();
                    hook(BasicScopeStackOp::Pop, self.as_slice());
                }
            }
            ScopeStackOp::Clear(amount) => {
                let cleared = match amount {
                    ClearAmount::TopN(n) => {
                        // don't try to clear more scopes than are on the stack
                        let to_leave = self.scopes.len() - min(n, self.scopes.len());
                        self.scopes.split_off(to_leave)
                    }
                    ClearAmount::All => {
                        let mut cleared = Vec::new();
                        mem::swap(&mut cleared, &mut self.scopes);
                        cleared
                    }
                };
                let clear_amount = cleared.len();
                self.clear_stack.push(cleared);
                for _ in 0..clear_amount {
                    hook(BasicScopeStackOp::Pop, self.as_slice());
                }
            }
            ScopeStackOp::Restore => {
                match self.clear_stack.pop() {
                    Some(ref mut to_push) => {
                        for s in to_push {
                            self.scopes.push(*s);
                            hook(BasicScopeStackOp::Push(*s), self.as_slice());
                        }
                    }
                    None => panic!("tried to restore cleared scopes, but none were cleared"),
                }
            }
            ScopeStackOp::Noop => (),
        }
    }

    /// Prints out each scope in the stack separated by spaces
    /// and then a newline. Top of the stack at the end.
    pub fn debug_print(&self, repo: &ScopeRepository) {
        for s in &self.scopes {
            print!("{} ", repo.to_string(*s));
        }
        println!();
    }

    /// Returns the bottom `n` elements of the stack.
    ///
    /// Equivalent to `&scopes[0..n]` on a `Vec`
    pub fn bottom_n(&self, n: usize) -> &[Scope] {
        &self.scopes[0..n]
    }

    /// Return a slice of the scopes in this stack
    #[inline]
    pub fn as_slice(&self) -> &[Scope] {
        &self.scopes[..]
    }

    /// Return the height/length of this stack
    #[inline]
    pub fn len(&self) -> usize {
        self.scopes.len()
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Checks if this stack as a selector matches the given stack, returning the match score if so
    ///
    /// Higher match scores indicate stronger matches. Scores are ordered according to the rules
    /// found at [https://manual.macromates.com/en/scope_selectors](https://manual.macromates.com/en/scope_selectors)
    ///
    /// It accomplishes this ordering through some floating point math ensuring deeper and longer
    /// matches matter. Unfortunately it is only guaranteed to return perfectly accurate results up
    /// to stack depths of 17, but it should be reasonably good even afterwards. TextMate has the
    /// exact same limitation, dunno about Sublime Text.
    ///
    /// # Examples
    /// ```
    /// use syntect::parsing::{ScopeStack, MatchPower};
    /// use std::str::FromStr;
    /// assert_eq!(ScopeStack::from_str("a.b c e.f").unwrap()
    ///     .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
    ///     Some(MatchPower(0o212u64 as f64)));
    /// assert_eq!(ScopeStack::from_str("a c.d.e").unwrap()
    ///     .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
    ///     None);
    /// ```
    pub fn does_match(&self, stack: &[Scope]) -> Option<MatchPower> {
        let mut sel_index: usize = 0;
        let mut score: f64 = 0.0;
        for (i, scope) in stack.iter().enumerate() {
            let sel_scope = self.scopes[sel_index];
            if sel_scope.is_prefix_of(*scope) {
                let len = sel_scope.len();
                // equivalent to score |= len << (ATOM_LEN_BITS*i) on a large unsigned
                score += f64::from(len) * f64::from(ATOM_LEN_BITS * (i as u16)).exp2();
                sel_index += 1;
                if sel_index >= self.scopes.len() {
                    return Some(MatchPower(score));
                }
            }
        }
        None
    }
}

impl FromStr for ScopeStack {
    type Err = ParseScopeError;

    /// Parses a scope stack from a whitespace separated list of scopes.
    fn from_str(s: &str) -> Result<ScopeStack, ParseScopeError> {
        let mut scopes = Vec::new();
        for name in s.split_whitespace() {
            scopes.push(Scope::from_str(name)?)
        }
        Ok(ScopeStack::from_vec(scopes))
    }
}

impl fmt::Display for ScopeStack {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for s in &self.scopes {
            write!(f, "{} ", s)?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn misc() {
        // use std::mem;
        // use std::rc::{Rc};
        // use scope::*;
        // assert_eq!(8, mem::size_of::<Rc<Scope>>());
        // assert_eq!(Scope::new("source.php"), Scope::new("source.php"));
    }

    #[test]
    fn repo_works() {
        let mut repo = ScopeRepository::new();
        assert_eq!(repo.build("source.php").unwrap(),
                   repo.build("source.php").unwrap());
        assert_eq!(repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap(),
                   repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap());
        assert_eq!(repo.build("").unwrap(), repo.build("").unwrap());
        let s1 = repo.build("").unwrap();
        assert_eq!(repo.to_string(s1), "");
        let s2 = repo.build("source.php.wow").unwrap();
        assert_eq!(repo.to_string(s2), "source.php.wow");
        assert!(repo.build("source.php").unwrap() != repo.build("source.perl").unwrap());
        assert!(repo.build("source.php").unwrap() != repo.build("source.php.wagon").unwrap());
        assert_eq!(repo.build("comment.line.").unwrap(),
                   repo.build("comment.line").unwrap());
    }

    #[test]
    fn global_repo_works() {
        use std::str::FromStr;
        assert_eq!(Scope::new("source.php").unwrap(),
                   Scope::new("source.php").unwrap());
        assert!(Scope::from_str("1.2.3.4.5.6.7.8").is_ok());
        assert!(Scope::from_str("1.2.3.4.5.6.7.8.9").is_err());
    }

    #[test]
    fn prefixes_work() {
        assert!(Scope::new("1.2.3.4.5.6.7.8")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
        assert!(Scope::new("1.2.3.4.5.6")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
        assert!(Scope::new("1.2.3.4")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
        assert!(!Scope::new("1.2.3.4.5.6.a")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
        assert!(!Scope::new("1.2.a.4.5.6.7")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
        assert!(!Scope::new("1.2.a.4.5.6.7")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5").unwrap()));
        assert!(!Scope::new("1.2.a")
            .unwrap()
            .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
    }

    #[test]
    fn matching_works() {
        use std::str::FromStr;
        assert_eq!(ScopeStack::from_str("string")
                       .unwrap()
                       .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
                   Some(MatchPower(0o1u64 as f64)));
        assert_eq!(ScopeStack::from_str("source")
                       .unwrap()
                       .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
                   None);
        assert_eq!(ScopeStack::from_str("a.b e.f")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   Some(MatchPower(0o202u64 as f64)));
        assert_eq!(ScopeStack::from_str("c e.f")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   Some(MatchPower(0o210u64 as f64)));
        assert_eq!(ScopeStack::from_str("c.d e.f")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   Some(MatchPower(0o220u64 as f64)));
        assert_eq!(ScopeStack::from_str("a.b c e.f")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   Some(MatchPower(0o212u64 as f64)));
        assert_eq!(ScopeStack::from_str("a c.d")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   Some(MatchPower(0o021u64 as f64)));
        assert_eq!(ScopeStack::from_str("a c.d.e")
                       .unwrap()
                       .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
                   None);
    }
}