tandem-server 0.4.18

use super::*;
use serde_json::{json, Value};
use std::collections::HashSet;
use std::path::Path;

fn artifact_candidate_source_priority(source: &str) -> i64 {
    match source {
        "verified_output" => 3,
        "session_write" => 2,
        "preexisting_output" => 1,
        _ => 0,
    }
}

fn normalized_anchor_variants(value: &str) -> Vec<String> {
    let trimmed = value.trim().to_ascii_lowercase();
    if trimmed.is_empty() {
        return Vec::new();
    }
    let mut variants = HashSet::new();
    variants.insert(trimmed.clone());
    let collapsed = trimmed
        .chars()
        .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { ' ' })
        .collect::<String>()
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ");
    if !collapsed.is_empty() {
        variants.insert(collapsed);
    }
    let compact = trimmed
        .chars()
        .filter(|ch| ch.is_ascii_alphanumeric())
        .collect::<String>();
    if !compact.is_empty() {
        variants.insert(compact);
    }
    if trimmed.contains('/') {
        if let Some(file_name) = Path::new(&trimmed)
            .file_name()
            .and_then(|value| value.to_str())
        {
            variants.insert(file_name.to_ascii_lowercase());
            variants.extend(
                file_name
                    .chars()
                    .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { ' ' })
                    .collect::<String>()
                    .split_whitespace()
                    .map(str::to_string)
                    .collect::<Vec<_>>(),
            );
        }
        if let Some(stem) = Path::new(&trimmed)
            .file_stem()
            .and_then(|value| value.to_str())
        {
            variants.insert(stem.to_ascii_lowercase());
            variants.extend(
                stem.chars()
                    .map(|ch| if ch.is_ascii_alphanumeric() { ch } else { ' ' })
                    .collect::<String>()
                    .split_whitespace()
                    .map(str::to_string)
                    .collect::<Vec<_>>(),
            );
        }
    }
    variants.into_iter().collect()
}

fn source_anchor_variants(source: &str) -> Vec<String> {
    let trimmed = source.trim();
    if trimmed.is_empty() {
        return Vec::new();
    }
    let mut variants = normalized_anchor_variants(trimmed);
    let without_scheme = trimmed
        .strip_prefix("https://")
        .or_else(|| trimmed.strip_prefix("http://"))
        .unwrap_or(trimmed);
    let host = without_scheme.split('/').next().unwrap_or(without_scheme);
    variants.extend(normalized_anchor_variants(host));
    if let Some(last_segment) = without_scheme.rsplit('/').next() {
        variants.extend(normalized_anchor_variants(last_segment));
    }
    variants.sort();
    variants.dedup();
    variants
}

pub(crate) fn source_evidence_anchor_target(read_paths: &[String], citations: &[String]) -> usize {
    let unique_sources = read_paths
        .iter()
        .chain(citations.iter())
        .map(|value| value.trim().to_ascii_lowercase())
        .filter(|value| !value.is_empty())
        .collect::<HashSet<_>>();
    match unique_sources.len() {
        0 => 0,
        1 => 1,
        _ => 2,
    }
}

pub(crate) fn evidence_anchor_count(
    text: &str,
    read_paths: &[String],
    citations: &[String],
) -> usize {
    let lowered = text.to_ascii_lowercase();
    let mut matched = HashSet::new();
    for source in read_paths.iter().chain(citations.iter()) {
        let source = source.trim();
        if source.is_empty() {
            continue;
        }
        let matched_source = source_anchor_variants(source)
            .into_iter()
            .any(|variant| !variant.is_empty() && lowered.contains(&variant));
        if matched_source {
            matched.insert(source.to_ascii_lowercase());
        }
    }
    matched.len()
}

pub(crate) fn assess_artifact_candidate(
    node: &AutomationFlowNode,
    workspace_root: &str,
    source: &str,
    text: &str,
    read_paths: &[String],
    discovered_relevant_paths: &[String],
    upstream_read_paths: &[String],
    upstream_citations: &[String],
) -> ArtifactCandidateAssessment {
    let trimmed = text.trim();
    let length = trimmed.len();
    let placeholder_like = placeholder_like_artifact_text(trimmed);
    let substantive = substantive_artifact_text(trimmed);
    let heading_count = markdown_heading_count(trimmed);
    let list_count = markdown_list_item_count(trimmed);
    let paragraph_count = paragraph_block_count(trimmed);
    let required_section_count = artifact_required_section_count(node, trimmed);
    let reviewed_paths = extract_markdown_section_paths(trimmed, "Files reviewed")
        .into_iter()
        .filter_map(|value| normalize_workspace_display_path(workspace_root, &value))
        .collect::<Vec<_>>();
    let files_not_reviewed = extract_markdown_section_paths(trimmed, "Files not reviewed")
        .into_iter()
        .filter_map(|value| normalize_workspace_display_path(workspace_root, &value))
        .collect::<Vec<_>>();
    let reviewed_paths_backed_by_read = reviewed_paths
        .iter()
        .filter(|path| read_paths.iter().any(|read| read == *path))
        .cloned()
        .collect::<Vec<_>>();
    let files_reviewed_present = files_reviewed_section_lists_paths(trimmed);
    let citation_count = markdown_citation_count(trimmed);
    let web_sources_reviewed_present = web_sources_reviewed_section_lists_sources(trimmed);
    let effective_relevant_paths = if discovered_relevant_paths.is_empty() {
        reviewed_paths.clone()
    } else {
        discovered_relevant_paths.to_vec()
    };
    let evidence_anchor_count =
        evidence_anchor_count(trimmed, upstream_read_paths, upstream_citations);
    let unreviewed_relevant_paths = effective_relevant_paths
        .iter()
        .filter(|path| {
            !read_paths.iter().any(|read| read == *path)
                && !files_not_reviewed.iter().any(|skipped| skipped == *path)
        })
        .cloned()
        .collect::<Vec<_>>();

    let mut score = 0i64;
    score += artifact_candidate_source_priority(source) * 25;
    score += (length.min(12_000) / 24) as i64;
    score += (heading_count as i64) * 60;
    score += (list_count as i64) * 18;
    score += (paragraph_count as i64) * 24;
    score += (required_section_count as i64) * 160;
    score += (evidence_anchor_count.min(5) as i64) * 120;
    if substantive {
        score += 2_000;
    }
    if files_reviewed_present {
        score += 180;
    }
    score += (citation_count.min(8) as i64) * 45;
    if web_sources_reviewed_present {
        score += 140;
    }
    if !reviewed_paths.is_empty() && reviewed_paths.len() == reviewed_paths_backed_by_read.len() {
        score += 260;
    } else if !reviewed_paths_backed_by_read.is_empty() {
        score += 90;
    }
    score -= (unreviewed_relevant_paths.len() as i64) * 220;
    if placeholder_like {
        score -= 450;
    }
    if trimmed.is_empty() {
        score -= 2_000;
    }

    ArtifactCandidateAssessment {
        source: source.to_string(),
        text: text.to_string(),
        length,
        score,
        substantive,
        placeholder_like,
        heading_count,
        list_count,
        paragraph_count,
        required_section_count,
        files_reviewed_present,
        reviewed_paths,
        reviewed_paths_backed_by_read,
        unreviewed_relevant_paths,
        citation_count,
        web_sources_reviewed_present,
        evidence_anchor_count,
    }
}

pub(crate) fn best_artifact_candidate(
    candidates: &[ArtifactCandidateAssessment],
) -> Option<ArtifactCandidateAssessment> {
    candidates.iter().cloned().max_by(|left, right| {
        left.score
            .cmp(&right.score)
            .then(left.substantive.cmp(&right.substantive))
            .then(
                left.required_section_count
                    .cmp(&right.required_section_count),
            )
            .then(left.evidence_anchor_count.cmp(&right.evidence_anchor_count))
            .then(left.heading_count.cmp(&right.heading_count))
            .then(left.length.cmp(&right.length))
            .then(
                artifact_candidate_source_priority(&left.source)
                    .cmp(&artifact_candidate_source_priority(&right.source)),
            )
    })
}

pub(crate) fn artifact_candidate_summary(
    candidate: &ArtifactCandidateAssessment,
    accepted: bool,
) -> Value {
    json!({
        "source": candidate.source,
        "length": candidate.length,
        "score": candidate.score,
        "substantive": candidate.substantive,
        "placeholder_like": candidate.placeholder_like,
        "heading_count": candidate.heading_count,
        "list_count": candidate.list_count,
        "paragraph_count": candidate.paragraph_count,
        "required_section_count": candidate.required_section_count,
        "files_reviewed_present": candidate.files_reviewed_present,
        "reviewed_paths_backed_by_read": candidate.reviewed_paths_backed_by_read,
        "unreviewed_relevant_paths": candidate.unreviewed_relevant_paths,
        "citation_count": candidate.citation_count,
        "web_sources_reviewed_present": candidate.web_sources_reviewed_present,
        "evidence_anchor_count": candidate.evidence_anchor_count,
        "accepted": accepted,
    })
}