cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
//! Markdown link extraction.
//!
//! Used by `check_issues` to find link/image targets inside an issue's
//! `index.md` and companion files. Only relative paths are returned; URLs,
//! pure `#anchor` links, and parent-directory references are filtered out.

use pulldown_cmark::{Event, Parser, Tag};

/// Extract relative link/image targets from a markdown source.
///
/// Filters applied:
/// - URLs (anything containing `://` or starting with `mailto:`) — skipped
/// - Pure anchor links (starting with `#`) — skipped
/// - Parent-directory references (containing `..`) — skipped (out of scope)
/// - Trailing `#anchor` is stripped from otherwise-relative paths so
///   `[link](plan.md#section)` becomes `plan.md`.
///
/// Returns the targets in document order. Duplicates are kept; the caller
/// can dedup if needed.
pub fn extract_relative_targets(markdown: &str) -> Vec<String> {
    let mut out = Vec::new();
    for event in Parser::new(markdown) {
        let dest = match event {
            Event::Start(Tag::Link { dest_url, .. }) => dest_url,
            Event::Start(Tag::Image { dest_url, .. }) => dest_url,
            _ => continue,
        };
        if let Some(target) = filter_target(dest.as_ref()) {
            out.push(target);
        }
    }
    out
}

fn filter_target(raw: &str) -> Option<String> {
    let trimmed = raw.trim();
    if trimmed.is_empty() {
        return None;
    }
    if trimmed.starts_with('#') {
        return None;
    }
    if trimmed.starts_with("mailto:") || trimmed.contains("://") {
        return None;
    }
    if trimmed.contains("..") {
        return None;
    }
    // CommonMark allows `./file.md` as an explicit current-directory reference;
    // strip the prefix so callers can match against bare filenames.
    let normalized = trimmed.strip_prefix("./").unwrap_or(trimmed);
    let without_anchor = match normalized.split_once('#') {
        Some((path, _anchor)) => path,
        None => normalized,
    };
    if without_anchor.is_empty() {
        return None;
    }
    Some(without_anchor.to_string())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extracts_relative_link_target() {
        let md = "See [the plan](plan.md).";
        assert_eq!(extract_relative_targets(md), vec!["plan.md"]);
    }

    #[test]
    fn extracts_image_target() {
        let md = "![mockup](mockup.png)";
        assert_eq!(extract_relative_targets(md), vec!["mockup.png"]);
    }

    #[test]
    fn extracts_multiple_links_in_order() {
        let md = "Read [a](a.md), then [b](b.md), then ![c](c.png).";
        assert_eq!(extract_relative_targets(md), vec!["a.md", "b.md", "c.png"]);
    }

    #[test]
    fn skips_http_urls() {
        let md = "See [docs](https://example.com).";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn skips_mailto_links() {
        let md = "Email [me](mailto:me@example.com).";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn skips_pure_anchor_links() {
        let md = "Jump to [conclusion](#conclusion).";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn strips_anchor_from_relative_path() {
        let md = "See [section](plan.md#phase-2).";
        assert_eq!(extract_relative_targets(md), vec!["plan.md"]);
    }

    #[test]
    fn skips_parent_directory_refs() {
        // Out of scope for intra-issue validation.
        let md = "See [other](../0042-foo/index.md).";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn empty_target_is_ignored() {
        let md = "[empty]()";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn empty_after_stripping_anchor_is_ignored() {
        let md = "[anchor only](#section)";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn duplicate_targets_are_preserved() {
        let md = "See [a](plan.md) and [b](plan.md).";
        assert_eq!(extract_relative_targets(md), vec!["plan.md", "plan.md"]);
    }

    #[test]
    fn dot_slash_prefix_is_stripped() {
        // CommonMark allows `./companion.md` for an explicit current-directory
        // reference; treat it as equivalent to the bare filename so the link
        // checker matches against the file set built from bare names.
        let md = "See [the design](./design-decision.md).";
        assert_eq!(extract_relative_targets(md), vec!["design-decision.md"]);
    }

    #[test]
    fn dot_slash_alone_is_ignored() {
        let md = "[empty](./)";
        assert!(extract_relative_targets(md).is_empty());
    }

    #[test]
    fn dot_slash_with_anchor_is_normalized() {
        let md = "See [section](./plan.md#phase-2).";
        assert_eq!(extract_relative_targets(md), vec!["plan.md"]);
    }
}