depfile 0.1.1

Simply parse .d files
Documentation
//! Simply parse `.d` files
//! ```rust
//! use std::borrow::Cow;
//!
//! let input = r"
//! x.cpp.o: x.cpp \
//!   include/foo.h \
//!   include/bar.h
//! include/foo.h:
//! include/bar.h: f\ iz.h
//! ";
//!
//! let targets = depfile::parse(input).unwrap();
//!
//! // this is a bit ugly because of Cow<'_, str>
//! // the Depfile struct only clones if unescaping is needed
//! assert_eq!(
//!     targets.find("x.cpp.o").unwrap()
//!         .iter()
//!         .map(|x| x.as_ref())
//!         .collect::<Vec<_>>(),
//!     vec!["x.cpp", "include/foo.h", "include/bar.h"]
//! );
//!
//! // iterator gives (&str, &[Cow<str>])
//! let mut iter = targets.iter();

//! let (target, deps) = iter.next().unwrap();
//! assert_eq!(target, "x.cpp.o");
//! assert_eq!(deps, &[
//!     Cow::Borrowed("x.cpp"),
//!     Cow::Borrowed("include/foo.h"),
//!     Cow::Borrowed("include/bar.h")
//! ]);

//! let (target, deps) = iter.next().unwrap();
//! assert_eq!(target, "include/foo.h");
//! assert!(deps.is_empty());

//! let (target, deps) = iter.next().unwrap();
//! assert_eq!(target, "include/bar.h");
//! // escaped strings are cloned when unescaping
//! assert_eq!(deps, &[Cow::<'static, str>::Owned("f iz.h".to_string())]);
//!
//! assert_eq!(iter.next(), None);
//!
//! // recursive (DFS)
//! let targets = depfile::parse(input).unwrap();
//! let mut all_deps = targets.recurse_deps("x.cpp.o");
//! assert_eq!(all_deps.next(), Some("x.cpp"));
//! assert_eq!(all_deps.next(), Some("include/foo.h"));
//! assert_eq!(all_deps.next(), Some("include/bar.h"));
//! assert_eq!(all_deps.next(), Some("f iz.h"));
//! assert_eq!(all_deps.next(), None);
//! ```
use std::{borrow::Cow, collections::BTreeSet};

mod parse_impl;

/// Merge 2 [`Depfile`]s. The first must contain data that lives longer.
///
/// Also consider concatenating the string first before parsing
/// if the lifetime doesn't work for you.
pub fn merge<'a, 'b>(mut a: Depfile<'a>, b: Depfile<'b>) -> Depfile<'a>
where
    'b: 'a,
{
    a.rules.extend(b.rules);
    a
}

/// Parse the input as a [`Depfile`].
///
/// The output will have the targets and deps in the same order as the input.
///
/// On error, this returns the byte pos of the error.
///
/// Duplicate targets are not checked, the result will be arbitrary.
pub fn parse(input: &str) -> Result<Depfile<'_>, usize> {
    input.try_into()
}

/// Structure of a `.d` depfile. Constructed with [`parse`]
///
/// The Debug (`{:?}`) format prints unescaped symbols.
/// To reconstruct the depfile as string, consider
/// using an iterator and [`escape`]
#[derive(Clone, PartialEq, Eq, Default)]
pub struct Depfile<'a> {
    pub(crate) rules: Vec<(Cow<'a, str>, Vec<Cow<'a, str>>)>,
}
impl std::fmt::Debug for Depfile<'_> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut map = f.debug_map();
        for (target, deps) in &self.rules {
            let entry = map.key(&format!("'{target}'"));
            let value = deps.iter().map(|x| format!("'{x}'")).collect::<Vec<_>>();
            entry.value(&value);
        }
        map.finish()
    }
}

impl Depfile<'_> {
    /// Get number of targets in the depfile
    pub fn len(&self) -> usize {
        self.rules.len()
    }

    /// Return `true` if the depfile contains no target
    pub fn is_empty(&self) -> bool {
        self.rules.is_empty()
    }

    /// Sort the targets to enable binary search
    pub fn sort(&mut self) {
        self.rules.sort_unstable_by(|a, b| a.0.cmp(&b.0))
    }

    /// Binary search for a target and return its dependencies.
    /// `self` must be sorted.
    ///
    /// The target symbol is compared literally, i.e. the same paths with different separators
    /// are considered different.
    pub fn binary_search(&self, target: &str) -> Option<&[Cow<'_, str>]> {
        let i = self
            .rules
            .binary_search_by(|(t, _)| t.as_ref().cmp(target))
            .ok()?;
        Some(self.rules[i].1.as_slice())
    }

    /// Find the target and return its dependencies.
    ///
    /// The target symbol is compared literally, i.e. the same paths with different separators
    /// are considered different.
    pub fn find(&self, target: &str) -> Option<&[Cow<'_, str>]> {
        let (_, deps) = self.rules.iter().find(|(t, _)| t == target)?;
        Some(deps.as_slice())
    }

    /// Iterate over (target, deps) pairs.
    ///
    /// The deps are not recursive, use [`recurse_deps`](Self::recurse_deps) for recursive traversal
    /// of dependencies using DFS
    pub fn iter(&self) -> DepfileIter<'_> {
        DepfileIter(self.rules.iter())
    }

    /// Recursively iterator over all dependencies of the target.
    ///
    /// Each dependency will only be returned once
    pub fn recurse_deps(&self, target: &str) -> DepfileRecurDeps<'_> {
        match self.find(target) {
            None => DepfileRecurDeps {
                seen: Default::default(),
                rules: self,
                stack: Default::default(),
                next_target: None,
            },
            Some(deps) => DepfileRecurDeps {
                seen: Default::default(),
                rules: self,
                stack: vec![deps.iter()],
                next_target: None,
            },
        }
    }
}

/// Iterator returned by [`Depfile::iter`]
pub struct DepfileIter<'a>(std::slice::Iter<'a, (Cow<'a, str>, Vec<Cow<'a, str>>)>);
impl<'a> Iterator for DepfileIter<'a> {
    type Item = (&'a str, &'a [Cow<'a, str>]);

    fn next(&mut self) -> Option<Self::Item> {
        let (target, deps) = self.0.next()?;
        Some((target.as_ref(), deps.as_slice()))
    }
}

/// Iterator returned by [`Depfile::recurse_deps`]
pub struct DepfileRecurDeps<'a> {
    seen: BTreeSet<&'a str>,
    rules: &'a Depfile<'a>,
    stack: Vec<std::slice::Iter<'a, Cow<'a, str>>>,
    next_target: Option<&'a str>,
}
impl<'a> Iterator for DepfileRecurDeps<'a> {
    type Item = &'a str;

    fn next(&mut self) -> Option<Self::Item> {
        if let Some(target) = self.next_target.take() {
            if let Some(deps) = self.rules.find(target) {
                if !deps.is_empty() {
                    self.stack.push(deps.iter());
                }
            }
        }
        while let Some(iter) = self.stack.last_mut() {
            for target in iter.by_ref() {
                let target_str = target.as_ref();
                if self.seen.insert(target_str) {
                    self.next_target = Some(target_str);
                    return self.next_target;
                }
            }
            self.stack.pop();
        }
        None
    }
}

/// Escape a target or dependency in the depfile. Newlines are not supported.
pub fn escape(input: &str) -> Cow<'_, str> {
    let mut s = String::new();
    macro_rules! handle_escape {
        ($i:ident, $seq:literal) => {{
            if s.is_empty() {
                // TODO: optimize with unsafe if codegen is better
                s.push_str(&input[..$i]);
            }
            s.push_str($seq);
        }};
    }
    for (i, c) in input.char_indices() {
        match c {
            ' ' => handle_escape!(i, "\\ "),
            '\\' => handle_escape!(i, "\\\\"),
            ':' => handle_escape!(i, "\\:"),
            _ => {
                if !s.is_empty() {
                    s.push(c)
                }
            }
        }
    }
    if s.is_empty() {
        return input.into();
    }
    s.into()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_escape() {
        assert_eq!(escape(""), Cow::Borrowed(""));
        assert_eq!(escape("abc"), Cow::Borrowed("abc"));
        assert_eq!(escape(" "), Cow::<'static, str>::Owned("\\ ".to_string()));
        assert_eq!(escape(":"), Cow::<'static, str>::Owned("\\:".to_string()));
        assert_eq!(escape("/"), Cow::Borrowed("/"));
        assert_eq!(escape("\\"), Cow::<'static, str>::Owned("\\\\".to_string()));
        assert_eq!(
            escape("a bc"),
            Cow::<'static, str>::Owned("a\\ bc".to_string())
        );
        assert_eq!(
            escape("a:bc"),
            Cow::<'static, str>::Owned("a\\:bc".to_string())
        );
        assert_eq!(escape("a/bc"), Cow::Borrowed("a/bc"));
        assert_eq!(
            escape("a\\bc"),
            Cow::<'static, str>::Owned("a\\\\bc".to_string())
        );
        assert_eq!(escape("x "), Cow::<'static, str>::Owned("x\\ ".to_string()));
        assert_eq!(escape("x:"), Cow::<'static, str>::Owned("x\\:".to_string()));
        assert_eq!(escape("x/"), Cow::Borrowed("x/"));
        assert_eq!(
            escape("x\\"),
            Cow::<'static, str>::Owned("x\\\\".to_string())
        );
        assert_eq!(
            escape("x  "),
            Cow::<'static, str>::Owned("x\\ \\ ".to_string())
        );
        assert_eq!(
            escape("x :"),
            Cow::<'static, str>::Owned("x\\ \\:".to_string())
        );
        assert_eq!(
            escape("x /"),
            Cow::<'static, str>::Owned("x\\ /".to_string())
        );
        assert_eq!(
            escape("x \\"),
            Cow::<'static, str>::Owned("x\\ \\\\".to_string())
        );
        assert_eq!(
            escape("x::"),
            Cow::<'static, str>::Owned("x\\:\\:".to_string())
        );
        assert_eq!(
            escape("x:/"),
            Cow::<'static, str>::Owned("x\\:/".to_string())
        );
        assert_eq!(
            escape("x:\\"),
            Cow::<'static, str>::Owned("x\\:\\\\".to_string())
        );
        assert_eq!(escape("x//"), Cow::Borrowed("x//"));
        assert_eq!(
            escape("x/\\"),
            Cow::<'static, str>::Owned("x/\\\\".to_string())
        );
    }

    #[test]
    fn test_sort_search() {
        let mut df = crate::parse("x:1\nb:2\na:3\nc:4\nz:5\n").unwrap();
        df.sort();
        assert_eq!(df.binary_search("x").unwrap(), [Cow::from("1")].as_ref());
        assert_eq!(df.binary_search("b").unwrap(), [Cow::from("2")].as_ref());
        assert_eq!(df.binary_search("a").unwrap(), [Cow::from("3")].as_ref());
        assert_eq!(df.binary_search("c").unwrap(), [Cow::from("4")].as_ref());
        assert_eq!(df.binary_search("z").unwrap(), [Cow::from("5")].as_ref());
    }

    #[test]
    fn test_recursive() {
        let df = crate::parse("x:a b\nb:c\na:c\nc:d\nd:a\n").unwrap();
        let deps = df.recurse_deps("x").collect::<Vec<_>>();
        assert_eq!(deps, vec!["a", "c", "d", "b"])
    }
}