wcl 0.10.0-alpha

//! Shared parsing/utilities for `wcl set`, `wcl remove`, and `wcl add`.
//!
//! Spec grammar (set, remove):
//!     <selector>  [ '~>' <action> ]
//!
//! Action grammar:
//!   set:    <path> '=' <wcl-expression>
//!   remove: <path>      (or empty — selector-only removes whole block)
//!
//! Assignment grammar (add only):
//!     <path> '=' <wcl-expression>
//!
//! Path grammar:
//!     ('.' segment)+
//!     segment := ident ( '#' ident )?

use std::collections::HashMap;
use std::path::Path;

use crate::cli::LibraryArgs;
use crate::eval::{BlockRef, Evaluator, QueryEngine, ScopeId, ScopeKind, Value};
use crate::lang::ast::{Block, BodyItem, DocItem, Document, InlineId};
use crate::lang::Span;

#[derive(Debug, Clone)]
pub struct PathSegment {
    pub name: String,
    pub id: Option<String>,
}

#[derive(Debug, Clone)]
pub struct AssignmentPath {
    pub segments: Vec<PathSegment>,
}

impl AssignmentPath {
    pub fn last(&self) -> &PathSegment {
        self.segments.last().expect("non-empty path")
    }
}

/// Split a `set`/`remove` spec into a selector string and an optional action string.
///
/// `<selector> ~> <action>`  →  (selector, Some(action))
/// `<selector>`              →  (selector, None)
///
/// We split on the *first* `~>` that is not inside a string literal so users can
/// embed `~>` in selector filter expressions if they need to.
pub fn split_spec(spec: &str) -> (&str, Option<&str>) {
    let bytes = spec.as_bytes();
    let mut i = 0;
    let mut in_string = false;
    let mut string_quote = b'"';
    while i + 1 < bytes.len() {
        let b = bytes[i];
        if in_string {
            if b == b'\\' {
                i += 2;
                continue;
            }
            if b == string_quote {
                in_string = false;
            }
            i += 1;
            continue;
        }
        if b == b'"' || b == b'\'' {
            in_string = true;
            string_quote = b;
            i += 1;
            continue;
        }
        if b == b'~' && bytes[i + 1] == b'>' {
            let selector = spec[..i].trim_end();
            let action = spec[i + 2..].trim_start();
            return (selector, Some(action));
        }
        i += 1;
    }
    (spec.trim(), None)
}

/// Parse a `.foo` or `.foo.bar.baz` or `.foo#id.bar` path.
///
/// Returns the parsed path. Stops at the first non-path character.
/// On success, returns `(path, bytes_consumed)`.
pub fn parse_path(input: &str) -> Result<(AssignmentPath, usize), String> {
    let bytes = input.as_bytes();
    let mut i = 0;

    // Skip leading whitespace
    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
        i += 1;
    }

    let mut segments = Vec::new();

    while i < bytes.len() && bytes[i] == b'.' {
        i += 1;
        // Read identifier
        let name_start = i;
        while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
            i += 1;
        }
        if i == name_start {
            return Err(format!("expected identifier after '.' at offset {}", i));
        }
        let name = input[name_start..i].to_string();

        // Optional #id
        let id = if i < bytes.len() && bytes[i] == b'#' {
            i += 1;
            let id_start = i;
            while i < bytes.len()
                && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_' || bytes[i] == b'-')
            {
                i += 1;
            }
            if i == id_start {
                return Err(format!("expected identifier after '#' at offset {}", i));
            }
            Some(input[id_start..i].to_string())
        } else {
            None
        };

        segments.push(PathSegment { name, id });
    }

    if segments.is_empty() {
        return Err("expected a path starting with '.'".to_string());
    }

    Ok((AssignmentPath { segments }, i))
}

/// Parse `<path> = <wcl-expression>`. Returns the path and the raw rhs source text.
pub fn parse_assignment(input: &str) -> Result<(AssignmentPath, String), String> {
    let (path, mut i) = parse_path(input)?;
    let bytes = input.as_bytes();
    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
        i += 1;
    }
    if i >= bytes.len() || bytes[i] != b'=' {
        return Err("expected '=' after path".to_string());
    }
    i += 1;
    let rhs = input[i..].trim().to_string();
    if rhs.is_empty() {
        return Err("expected expression after '='".to_string());
    }
    // Validate the rhs by parsing it as a WCL expression. We discard the AST
    // and use the source text verbatim when rewriting the file.
    crate::lang::parse_expression(&rhs, crate::FileId(0)).map_err(|d| {
        let messages: Vec<String> = d
            .into_diagnostics()
            .into_iter()
            .map(|x| x.message)
            .collect();
        format!("invalid expression: {}", messages.join("; "))
    })?;
    Ok((path, rhs))
}

/// Parse just a path with no `=` (used by `wcl remove`'s action).
pub fn parse_path_only(input: &str) -> Result<AssignmentPath, String> {
    let (path, i) = parse_path(input)?;
    let rest = input[i..].trim();
    if !rest.is_empty() {
        return Err(format!("unexpected text after path: '{}'", rest));
    }
    Ok(path)
}

/// Build a flat map from block AST span -> &Block by walking the document
/// recursively. Used to map query results back to mutable AST positions.
pub fn build_span_to_block(doc: &Document) -> HashMap<Span, &Block> {
    let mut map = HashMap::new();
    for item in &doc.items {
        if let DocItem::Body(BodyItem::Block(b)) = item {
            visit_block(b, &mut map);
        }
    }
    map
}

fn visit_block<'a>(block: &'a Block, map: &mut HashMap<Span, &'a Block>) {
    map.insert(block.span, block);
    for item in &block.body {
        if let BodyItem::Block(child) = item {
            visit_block(child, map);
        }
    }
}

/// Look up the inline id of a block, if it has a literal one.
pub fn block_inline_id(block: &Block) -> Option<&str> {
    match &block.inline_id {
        Some(InlineId::Literal(lit)) => Some(&lit.value),
        _ => None,
    }
}

/// Run a query pipeline against the evaluated document, returning matched
/// `BlockRef`s with proper attribute values (unlike `Document::query` which
/// re-evaluates against an empty scope and drops attributes that depend on
/// let bindings).
pub fn run_selector(doc: &crate::Document, selector_str: &str) -> Result<Vec<Value>, String> {
    let pipeline = crate::lang::parse_query(selector_str, crate::FileId(0)).map_err(|d| {
        let messages: Vec<String> = d
            .into_diagnostics()
            .into_iter()
            .map(|x| x.message)
            .collect();
        format!("selector parse error: {}", messages.join("; "))
    })?;

    // Pull every BlockRef out of doc.values (top-level) and recursively from
    // their children so the query engine can match nested kinds via `..kind`.
    let mut blocks: Vec<BlockRef> = Vec::new();
    for value in doc.values.values() {
        collect_block_refs_from_value(value, &mut blocks);
    }

    let mut evaluator = Evaluator::new();
    let scope: ScopeId = evaluator.scopes_mut().create_scope(ScopeKind::Module, None);
    let engine = QueryEngine::new();
    let result = engine
        .execute(&pipeline, &blocks, &mut evaluator, scope)
        .map_err(|e| format!("selector error: {}", e))?;

    let matches = match result {
        Value::List(items) => items,
        single => vec![single],
    };
    Ok(matches)
}

fn collect_block_refs_from_value(value: &Value, out: &mut Vec<BlockRef>) {
    match value {
        Value::BlockRef(br) => out.push(br.clone()),
        Value::List(items) => {
            for item in items {
                collect_block_refs_from_value(item, out);
            }
        }
        _ => {}
    }
}

/// Parse `source` using the file's parent directory as the root, apply library
/// args, and return the resulting `Document`. On parse failure prints
/// diagnostics to stderr and returns an error string identifying `file`.
pub fn load_parsed(
    file: &Path,
    source: &str,
    lib_args: &LibraryArgs,
) -> Result<crate::Document, String> {
    let mut options = crate::ParseOptions {
        root_dir: file.parent().unwrap_or(Path::new(".")).to_path_buf(),
        ..Default::default()
    };
    lib_args.apply(&mut options);
    let doc = crate::parse(source, options);
    if doc.ast.items.is_empty() && doc.has_errors() {
        for d in doc.errors() {
            eprintln!("{}", super::format_diagnostic(d, &doc.source_map, file));
        }
        return Err(format!("parse errors in {}", file.display()));
    }
    Ok(doc)
}

/// Walk from `start` through all segments in `path` except the last, returning
/// the block at the penultimate segment. Used by `set` and `remove` before
/// operating on the final path segment (an attribute or final block).
pub fn walk_path_blocks<'a>(start: &'a Block, path: &AssignmentPath) -> Result<&'a Block, String> {
    let mut current = start;
    for seg in &path.segments[..path.segments.len() - 1] {
        current = current
            .body
            .iter()
            .find_map(|bi| match bi {
                BodyItem::Block(b)
                    if b.kind.name == seg.name
                        && (seg.id.is_none() || block_inline_id(b) == seg.id.as_deref()) =>
                {
                    Some(b)
                }
                _ => None,
            })
            .ok_or_else(|| {
                format!(
                    "nested block '{}' not found in {}{}",
                    seg.name,
                    current.kind.name,
                    block_inline_id(current)
                        .map(|s| format!("#{}", s))
                        .unwrap_or_default()
                )
            })?;
    }
    Ok(current)
}

/// Indentation string (run of spaces/tabs) for the line containing `byte_offset`.
pub fn line_indent(source: &str, byte_offset: usize) -> String {
    let bytes = source.as_bytes();
    let mut start = byte_offset.min(bytes.len());
    while start > 0 && bytes[start - 1] != b'\n' {
        start -= 1;
    }
    let mut end = start;
    while end < bytes.len() && (bytes[end] == b' ' || bytes[end] == b'\t') {
        end += 1;
    }
    source[start..end].to_string()
}