haz-query 0.1.0

Query evaluator over haz task DAGs.
Documentation
//! Lifter and intersection check for path-pattern atoms.
//!
//! Per `QRY-003`, the `--inputs` and `--outputs` filters accept
//! atoms drawn from the path-pattern grammar of
//! `PATH-008..PATH-016`. The lifter
//! [`parse_path_pattern_atom`] converts a [`RawAtom`] into a
//! validated [`haz_domain::path::PathPattern`]; the
//! [`intersects`] function decides whether two path patterns
//! share at least one matched path, with the glob-vs-glob arm
//! handled soundly via [`super::glob_intersect`].
//!
//! Both patterns passed to [`intersects`] MUST resolve in the
//! same coordinate system. In practice this means canonicalised
//! workspace-absolute form; the caller (the query engine) is
//! responsible for the canonicalisation step before invoking
//! the intersection check.

use haz_domain::path::PathPattern;
use haz_query_lang::expr::RawAtom;

use crate::expr::atom::AtomError;
use crate::expr::glob_intersect::{GlobIntersectError, glob_intersect_non_empty};

/// Parse a raw atom as a [`PathPattern`] per `QRY-003`.
///
/// Designed for use with
/// [`haz_query_lang::expr::Expr::try_map`] to lift an entire
/// parsed expression to `Expr<PathPattern>` in one step.
///
/// # Errors
///
/// Returns [`AtomError::InvalidPathPattern`] when the atom text
/// violates `PATH-001..PATH-016`.
pub fn parse_path_pattern_atom(atom: RawAtom) -> Result<PathPattern, AtomError> {
    let RawAtom { text, span } = atom;
    PathPattern::parse(&text).map_err(|source| AtomError::InvalidPathPattern { span, source })
}

/// Does the matched set of `lhs` overlap that of `rhs`?
///
/// The check is sound (no false positives, no false negatives)
/// across the three pattern-pair shapes:
///
/// - literal/literal: byte-equal comparison of the canonical
///   forms.
/// - literal/glob: the literal is tested against the compiled
///   glob matcher.
/// - glob/glob: a product-DFA emptiness check via
///   [`super::glob_intersect`].
///
/// Both patterns are assumed to be in the same coordinate
/// system (typically canonicalised workspace-absolute).
///
/// # Errors
///
/// Returns [`GlobIntersectError`] only for the glob/glob arm,
/// when the underlying DFA construction or anchored-start
/// lookup fails. The literal arms are infallible.
pub fn intersects(lhs: &PathPattern, rhs: &PathPattern) -> Result<bool, GlobIntersectError> {
    match (lhs, rhs) {
        (PathPattern::Literal(a), PathPattern::Literal(b)) => Ok(a == b),
        (PathPattern::Literal(literal), PathPattern::Glob(g))
        | (PathPattern::Glob(g), PathPattern::Literal(literal)) => {
            let compiled = g.compile();
            let literal_text = literal.to_string();
            Ok(compiled.compile_matcher().is_match(literal_text.as_str()))
        }
        (PathPattern::Glob(g1), PathPattern::Glob(g2)) => {
            glob_intersect_non_empty(&g1.compile(), &g2.compile())
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use haz_domain::path::PathPatternError;
    use haz_query_lang::expr::Expr;
    use haz_query_lang::parser::parse;
    use haz_query_lang::span::Span;

    fn raw(text: &str, start: usize, end: usize) -> RawAtom {
        RawAtom {
            text: text.to_owned(),
            span: Span { start, end },
        }
    }

    // --- Lifter happy paths -----------------------------------

    #[test]
    fn qry_003_lifts_literal_path_atom() {
        let pattern = parse_path_pattern_atom(raw("/lib/main.rs", 0, 12)).unwrap();
        assert!(pattern.is_literal());
    }

    #[test]
    fn qry_003_lifts_glob_path_atom() {
        let pattern = parse_path_pattern_atom(raw("src/**/*.rs", 0, 11)).unwrap();
        assert!(pattern.is_glob());
    }

    #[test]
    fn qry_003_lifts_workspace_absolute_glob() {
        let pattern = parse_path_pattern_atom(raw("/lib/src/*.rs", 0, 13)).unwrap();
        assert!(pattern.is_glob());
    }

    // --- Lifter error paths -----------------------------------

    #[test]
    fn qry_003_rejects_empty_path_pattern_atom() {
        let err = parse_path_pattern_atom(raw("", 5, 5)).unwrap_err();
        match err {
            AtomError::InvalidPathPattern { span, source } => {
                assert_eq!(span, Span { start: 5, end: 5 });
                assert!(matches!(source, PathPatternError::EmptyInput));
            }
            other => panic!("expected InvalidPathPattern, got {other:?}"),
        }
    }

    #[test]
    fn qry_003_rejects_double_star_adjacent_to_literal() {
        // PATH-011: `**` may only be a complete segment.
        let err = parse_path_pattern_atom(raw("a**b/foo.rs", 2, 13)).unwrap_err();
        match err {
            AtomError::InvalidPathPattern {
                span,
                source: PathPatternError::InvalidGlobSegment { .. },
            } => {
                assert_eq!(span, Span { start: 2, end: 13 });
            }
            other => panic!("expected InvalidPathPattern, got {other:?}"),
        }
    }

    // --- End-to-end lift via try_map --------------------------

    #[test]
    fn qry_003_lifts_parsed_expression_to_typed_path_expression() {
        let expr = parse("src/**/*.rs & !src/tests/**").unwrap();
        let typed = expr.try_map(parse_path_pattern_atom).unwrap();
        let src_all = PathPattern::parse("src/**/*.rs").unwrap();
        let src_tests = PathPattern::parse("src/tests/**").unwrap();
        let expected = Expr::And(
            Box::new(Expr::Atom(src_all)),
            Box::new(Expr::Not(Box::new(Expr::Atom(src_tests)))),
        );
        assert_eq!(typed, expected);
    }

    // --- intersects(): literal/literal arm --------------------

    #[test]
    fn qry_003_literal_literal_byte_equal_intersect() {
        let lhs = PathPattern::parse("/lib/main.rs").unwrap();
        let rhs = PathPattern::parse("/lib/main.rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_literal_literal_byte_inequal_disjoint() {
        let lhs = PathPattern::parse("/lib/main.rs").unwrap();
        let rhs = PathPattern::parse("/lib/lib.rs").unwrap();
        assert!(!intersects(&lhs, &rhs).unwrap());
    }

    // --- intersects(): literal/glob arm -----------------------

    #[test]
    fn qry_003_literal_inside_glob_matches() {
        let lhs = PathPattern::parse("/lib/src/main.rs").unwrap();
        let rhs = PathPattern::parse("/lib/src/*.rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_literal_outside_glob_does_not_match() {
        let lhs = PathPattern::parse("/lib/src/main.rs").unwrap();
        let rhs = PathPattern::parse("/web/src/*.rs").unwrap();
        assert!(!intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_matches_literal_in_either_direction() {
        let glob_pat = PathPattern::parse("/lib/**/*.rs").unwrap();
        let literal = PathPattern::parse("/lib/src/deep/file.rs").unwrap();
        assert!(intersects(&glob_pat, &literal).unwrap());
        assert!(intersects(&literal, &glob_pat).unwrap());
    }

    // --- intersects(): glob/glob arm (Q8.3) -------------------

    #[test]
    fn qry_003_glob_glob_overlapping_intersect() {
        let lhs = PathPattern::parse("/lib/src/**/*.rs").unwrap();
        let rhs = PathPattern::parse("/lib/src/foo/*.rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_disjoint_prefixes_do_not_intersect() {
        let lhs = PathPattern::parse("/lib/src/**/*.rs").unwrap();
        let rhs = PathPattern::parse("/web/src/**/*.rs").unwrap();
        assert!(!intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_disjoint_extensions_do_not_intersect() {
        let lhs = PathPattern::parse("/lib/**/*.rs").unwrap();
        let rhs = PathPattern::parse("/lib/**/*.js").unwrap();
        assert!(!intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_alternation_overlap_intersect() {
        let lhs = PathPattern::parse("/lib/{a,b}.rs").unwrap();
        let rhs = PathPattern::parse("/lib/{b,c}.rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_char_class_overlap_intersect() {
        let lhs = PathPattern::parse("/lib/[abc].rs").unwrap();
        let rhs = PathPattern::parse("/lib/[bcd].rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_char_class_disjoint_do_not_intersect() {
        let lhs = PathPattern::parse("/lib/[ab].rs").unwrap();
        let rhs = PathPattern::parse("/lib/[cd].rs").unwrap();
        assert!(!intersects(&lhs, &rhs).unwrap());
    }

    #[test]
    fn qry_003_glob_glob_double_star_absorbs_single_star_prefix() {
        let lhs = PathPattern::parse("/lib/**/*.rs").unwrap();
        let rhs = PathPattern::parse("/lib/*/*.rs").unwrap();
        assert!(intersects(&lhs, &rhs).unwrap());
    }
}