mle 0.28.0

The markup link extractor (mle) extracts links from markup files (Markdown and HTML).
Documentation
// SPDX-FileCopyrightText: 2022 - 2025 Robin Vobruba <hoijui.quaero@gmail.com>
// SPDX-FileCopyrightText: 2020 Armin Becher <becherarmin@gmail.com>
//
// SPDX-License-Identifier: AGPL-3.0-or-later

pub mod anchor;
// This is here, because we want to share some of it
// to tools that depend on us as a library
// and reuse parts of our CLI, for example `mlc`.
pub mod cli;
pub mod config;
pub mod extractors;
pub mod ignore_link;
pub mod link;
pub mod markup;
pub mod result;
pub mod state;

use crate::anchor::Anchor;
use crate::link::Link;
pub use colored::*;
pub use config::Extractor as Config;
use git_version::git_version;
use state::State;
pub use wildmatch::WildMatch;

// TODO Get rid of these two, replacing them with something more idiomatic (thiserror or error_set?)
pub type BoxError = Box<dyn std::error::Error + Send + Sync>;
pub type BoxResult<T> = Result<T, BoxError>;

// This tests rust code in the README with doc-tests.
// Though, It will not appear in the generated documentation.
#[doc = include_str!("../README.md")]
#[cfg(doctest)]
pub struct ReadmeDoctests;

pub const VERSION: &str = git_version!(cargo_prefix = "", fallback = "unknown");

#[must_use]
pub async fn find_all_links(conf: &Config) -> (Vec<Link>, Vec<Anchor>, Vec<BoxError>) {
    let mut links = vec![];
    let mut anchor_targets = vec![];
    let mut errors: Vec<_> = vec![];
    for file in &conf.markup_files {
        match markup::File::try_from(file.clone()) {
            Ok(markup_file) => match extractors::gather_links(&markup_file, conf).await {
                Ok(mut parsed) => {
                    links.append(&mut parsed.links);
                    anchor_targets.append(&mut parsed.anchors);
                }
                Err(err) => {
                    errors.push(err.into());
                }
            },
            Err(err) => {
                errors.push(err.into());
            }
        }
    }
    (links, anchor_targets, errors)
}

/// Runs the markup link extractor.
/// This is the main entry point of this library.
///
/// # Errors
///
/// If reading of any input or writing of the log or result-file failed.
pub async fn run(state: &mut State) -> BoxResult<()> {
    let (links, anchors, errors) = find_all_links(&state.config.extractor).await;
    // TODO make this more stream-like, where each found link is directly sent to all output streams/files. See repvar code for how to do that.
    result::sink(&state.config, &links, &anchors, &errors)
        .await
        .map_err(Into::into)
}