homeboy 0.44.3

CLI for multi-component deployment and development workflow automation
Documentation
//! Core parsing primitives for text extraction and validation.
//!
//! This module provides the foundational layer for extracting structured data
//! from text content. All parsing operations in homeboy (versions, changelogs,
//! command output, git tags) are built on these primitives.

use crate::error::{Error, Result};
use regex::Regex;
use std::collections::BTreeSet;
use std::hash::Hash;
use std::path::PathBuf;

/// Extract first match from content using regex pattern with capture group.
/// Pattern must contain exactly one capture group for the value to extract.
/// Content is trimmed before matching.
pub fn extract_first(content: &str, pattern: &str) -> Option<String> {
    let re = Regex::new(pattern).ok()?;
    re.captures(content.trim())
        .and_then(|caps| caps.get(1))
        .map(|m| m.as_str().to_string())
}

/// Extract all matches from content using regex pattern with capture group.
/// Returns empty Vec if pattern is invalid, None only on regex compile error.
pub fn extract_all(content: &str, pattern: &str) -> Option<Vec<String>> {
    let re = Regex::new(pattern).ok()?;
    let matches: Vec<String> = re
        .captures_iter(content.trim())
        .filter_map(|caps| caps.get(1).map(|m| m.as_str().to_string()))
        .collect();
    Some(matches)
}

/// Replace all matches of capture group with new value.
/// Returns (new_content, replacement_count).
/// Content is trimmed before matching to stay consistent with extract_all,
/// but trailing newline is preserved in output.
pub fn replace_all(content: &str, pattern: &str, replacement: &str) -> Option<(String, usize)> {
    let re = Regex::new(pattern).ok()?;
    let mut count = 0usize;
    let had_trailing_newline = content.ends_with('\n');
    let trimmed = content.trim();

    let replaced = re
        .replace_all(trimmed, |caps: &regex::Captures| {
            count += 1;
            let full_match = caps.get(0).map(|m| m.as_str()).unwrap_or("");
            let captured = caps.get(1).map(|m| m.as_str()).unwrap_or("");
            full_match.replacen(captured, replacement, 1)
        })
        .to_string();

    let result = if had_trailing_newline && !replaced.ends_with('\n') {
        format!("{}\n", replaced)
    } else {
        replaced
    };

    Some((result, count))
}

/// Validate all extracted values are identical, return the canonical value.
/// Used for version consistency checks across multiple files.
pub fn require_identical<T>(values: &[T], context: &str) -> Result<T>
where
    T: Clone + Eq + Hash + std::fmt::Display + Ord,
{
    if values.is_empty() {
        return Err(Error::internal_unexpected(format!(
            "No values found in {}",
            context
        )));
    }

    let unique: BTreeSet<&T> = values.iter().collect();
    if unique.len() != 1 {
        let items: Vec<String> = unique.iter().map(|v| v.to_string()).collect();
        return Err(Error::internal_unexpected(format!(
            "Multiple different values found in {}: {}",
            context,
            items.join(", ")
        )));
    }

    Ok(values[0].clone())
}

/// Extract all matches and validate they are identical.
/// Combines extract_all + require_identical for the common pattern.
pub fn extract_unique(content: &str, pattern: &str, context: &str) -> Result<String> {
    let values = extract_all(content, pattern).ok_or_else(|| {
        Error::validation_invalid_argument(
            "pattern",
            format!("Invalid regex pattern: {}", pattern),
            None,
            None,
        )
    })?;

    if values.is_empty() {
        return Err(Error::internal_unexpected(format!(
            "No matches found in {} using pattern: {}",
            context, pattern
        )));
    }

    require_identical(&values, context)
}

/// Parse output into non-empty lines.
pub fn lines(output: &str) -> impl Iterator<Item = &str> {
    output.lines().filter(|line| !line.is_empty())
}

/// Convert content into a Vec of owned line strings.
///
/// Replaces the common pattern:
/// ```ignore
/// content.lines().map(|s| s.to_string()).collect()
/// ```
pub fn lines_to_vec(content: &str) -> Vec<String> {
    content.lines().map(|s| s.to_string()).collect()
}

/// Parse output into lines with custom filter.
pub fn lines_filtered<'a, F>(output: &'a str, filter: F) -> impl Iterator<Item = &'a str>
where
    F: Fn(&str) -> bool + 'a,
{
    output
        .lines()
        .filter(move |line| !line.is_empty() && filter(line))
}

/// Parse line by splitting on whitespace, returning parts if expected count met.
pub fn split_whitespace(line: &str, min_parts: usize) -> Option<Vec<&str>> {
    let parts: Vec<&str> = line.split_whitespace().collect();
    if parts.len() >= min_parts {
        Some(parts)
    } else {
        None
    }
}

/// Resolve path that may be absolute or relative to base.
pub fn resolve_path(base: &str, file: &str) -> PathBuf {
    if file.starts_with('/') {
        PathBuf::from(file)
    } else {
        PathBuf::from(base).join(file)
    }
}

/// Resolve path and return as String.
pub fn resolve_path_string(base: &str, file: &str) -> String {
    resolve_path(base, file).to_string_lossy().to_string()
}

/// Deduplicate preserving first occurrence order.
pub fn dedupe<T>(items: Vec<T>) -> Vec<T>
where
    T: Clone + Eq + Hash,
{
    let mut seen = std::collections::HashSet::new();
    items
        .into_iter()
        .filter(|item| seen.insert(item.clone()))
        .collect()
}

/// Parse a potentially combined project:subtarget identifier.
///
/// Splits on the first `:` only, allowing subtargets with colons.
/// Both parts are trimmed.
pub fn split_identifier(identifier: &str) -> (&str, Option<&str>) {
    match identifier.split_once(':') {
        Some((project, subtarget)) => {
            let project = project.trim();
            let subtarget = subtarget.trim();
            if subtarget.is_empty() {
                (project, None)
            } else {
                (project, Some(subtarget))
            }
        }
        None => (identifier.trim(), None),
    }
}

/// Extract a string value from a nested JSON path.
///
/// Traverses the JSON object using the provided path segments and returns
/// the final value as a string if it exists.
///
/// # Example
/// ```ignore
/// let json = serde_json::json!({"release": {"local_path": "/path/to/file"}});
/// let path = json_path_str(&json, &["release", "local_path"]);
/// assert_eq!(path, Some("/path/to/file"));
/// ```
pub fn json_path_str<'a>(json: &'a serde_json::Value, path: &[&str]) -> Option<&'a str> {
    let mut current = json;
    for part in path {
        current = current.get(part)?;
    }
    current.as_str()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_first_finds_version() {
        let content = r#"Version: 1.2.3"#;
        let pattern = r"Version:\s*(\d+\.\d+\.\d+)";
        assert_eq!(extract_first(content, pattern), Some("1.2.3".to_string()));
    }

    #[test]
    fn extract_first_returns_none_on_no_match() {
        let content = "no version here";
        let pattern = r"Version:\s*(\d+\.\d+\.\d+)";
        assert_eq!(extract_first(content, pattern), None);
    }

    #[test]
    fn extract_all_finds_multiple() {
        let content = "v1.0.0 and v2.0.0";
        let pattern = r"v(\d+\.\d+\.\d+)";
        let result = extract_all(content, pattern).unwrap();
        assert_eq!(result, vec!["1.0.0", "2.0.0"]);
    }

    #[test]
    fn replace_all_counts_replacements() {
        let content = "v1.0.0 and v1.0.0";
        let pattern = r"v(\d+\.\d+\.\d+)";
        let (replaced, count) = replace_all(content, pattern, "2.0.0").unwrap();
        assert_eq!(replaced, "v2.0.0 and v2.0.0");
        assert_eq!(count, 2);
    }

    #[test]
    fn require_identical_passes_single_value() {
        let values = vec!["1.0.0".to_string()];
        assert_eq!(
            require_identical(&values, "test").unwrap(),
            "1.0.0".to_string()
        );
    }

    #[test]
    fn require_identical_passes_duplicates() {
        let values = vec!["1.0.0".to_string(), "1.0.0".to_string()];
        assert_eq!(
            require_identical(&values, "test").unwrap(),
            "1.0.0".to_string()
        );
    }

    #[test]
    fn require_identical_fails_on_different() {
        let values = vec!["1.0.0".to_string(), "2.0.0".to_string()];
        assert!(require_identical(&values, "test").is_err());
    }

    #[test]
    fn lines_filters_empty() {
        let output = "line1\n\nline2\n";
        let result: Vec<&str> = lines(output).collect();
        assert_eq!(result, vec!["line1", "line2"]);
    }

    #[test]
    fn resolve_path_handles_absolute() {
        let result = resolve_path_string("/base", "/absolute/path");
        assert_eq!(result, "/absolute/path");
    }

    #[test]
    fn resolve_path_handles_relative() {
        let result = resolve_path_string("/base", "relative/path");
        assert_eq!(result, "/base/relative/path");
    }

    #[test]
    fn dedupe_preserves_order() {
        let items = vec!["a", "b", "a", "c", "b"];
        let result = dedupe(items);
        assert_eq!(result, vec!["a", "b", "c"]);
    }

    #[test]
    fn lines_to_vec_splits_correctly() {
        let content = "line1\nline2\nline3";
        let result = lines_to_vec(content);
        assert_eq!(result, vec!["line1", "line2", "line3"]);
    }

    #[test]
    fn lines_to_vec_preserves_empty_lines() {
        let content = "line1\n\nline3";
        let result = lines_to_vec(content);
        assert_eq!(result, vec!["line1", "", "line3"]);
    }

    #[test]
    fn json_path_str_extracts_nested_value() {
        let json = serde_json::json!({"release": {"local_path": "/path/to/file"}});
        assert_eq!(
            json_path_str(&json, &["release", "local_path"]),
            Some("/path/to/file")
        );
    }

    #[test]
    fn json_path_str_returns_none_for_missing_path() {
        let json = serde_json::json!({"release": {"version": "1.0.0"}});
        assert_eq!(json_path_str(&json, &["release", "local_path"]), None);
    }

    #[test]
    fn json_path_str_returns_none_for_non_string() {
        let json = serde_json::json!({"count": 42});
        assert_eq!(json_path_str(&json, &["count"]), None);
    }

    #[test]
    fn json_path_str_handles_single_level() {
        let json = serde_json::json!({"name": "test"});
        assert_eq!(json_path_str(&json, &["name"]), Some("test"));
    }

    #[test]
    fn json_path_str_handles_deep_nesting() {
        let json = serde_json::json!({"a": {"b": {"c": {"d": "value"}}}});
        assert_eq!(json_path_str(&json, &["a", "b", "c", "d"]), Some("value"));
    }

    #[test]
    fn split_identifier_parses_project_subtarget() {
        assert_eq!(
            split_identifier("extra-chill:events"),
            ("extra-chill", Some("events"))
        );
    }

    #[test]
    fn split_identifier_handles_project_only() {
        assert_eq!(split_identifier("extra-chill"), ("extra-chill", None));
    }

    #[test]
    fn split_identifier_preserves_subtarget_colons() {
        assert_eq!(
            split_identifier("project:sub:target"),
            ("project", Some("sub:target"))
        );
    }

    #[test]
    fn split_identifier_treats_empty_subtarget_as_none() {
        assert_eq!(split_identifier("project:"), ("project", None));
    }

    #[test]
    fn split_identifier_handles_empty_project() {
        assert_eq!(split_identifier(":subtarget"), ("", Some("subtarget")));
    }

    #[test]
    fn split_identifier_trims_whitespace() {
        assert_eq!(
            split_identifier("project : subtarget"),
            ("project", Some("subtarget"))
        );
    }
}