pg_liquid 0.2.0

A PostgreSQL extension for Liquid template processing.
use pest::iterators::Pair;
use pest::Parser;
use std::collections::HashSet;

#[derive(pest_derive::Parser)]
#[grammar = "liquid.pest"]
struct LiquidParser;

/// Extracts all unique variable names from a Liquid template by parsing it.
pub fn extract(template: &str) -> Vec<String> {
    let mut variables = HashSet::new();

    if let Ok(pairs) = LiquidParser::parse(Rule::LaxLiquidFile, template) {
        for pair in pairs {
            visit_pairs(pair, &mut variables);
        }
    }

    let mut result: Vec<String> = variables.into_iter().collect();
    result.sort();
    result
}

/// Recursively traverses the parse tree to find all variables.
fn visit_pairs(pair: Pair<Rule>, variables: &mut HashSet<String>) {
    if pair.as_rule() == Rule::Variable {
        if let Some(base_var) = extract_base_variable(pair.as_str()) {
            // Don't add liquid keywords as variables
            if !is_liquid_keyword(&base_var) {
                variables.insert(base_var);
            }
        }
    }

    for inner_pair in pair.into_inner() {
        visit_pairs(inner_pair, variables);
    }
}

/// Extracts the base variable name from a token (e.g., "user" from "user.name").
fn extract_base_variable(token: &str) -> Option<String> {
    // Simple parsing to extract the base variable name
    // Split on '.' or '[' to get the base variable name
    token
        .split(&['.', '['])
        .next()
        .filter(|s| !s.is_empty() && is_valid_identifier(s))
        .map(|s| s.to_string())
}

/// Checks if a string is a valid Liquid identifier
fn is_valid_identifier(s: &str) -> bool {
    if s.is_empty() {
        return false;
    }

    // First character must be a letter or underscore
    let mut chars = s.chars();
    if let Some(first_char) = chars.next() {
        if !first_char.is_ascii_alphabetic() && first_char != '_' {
            return false;
        }
    }

    // Remaining characters must be alphanumeric or underscore
    chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}

/// Checks if a string is a Liquid keyword that should not be treated as a variable
fn is_liquid_keyword(s: &str) -> bool {
    matches!(s,
        "in" | "and" | "or" | "contains" | "true" | "false" | "nil" | "null" |
        "empty" | "blank" | "if" | "unless" | "endif" | "endunless" | "else" |
        "elsif" | "for" | "endfor" | "assign" | "capture" | "endcapture" |
        "case" | "when" | "endcase" | "break" | "continue" | "comment" |
        "endcomment" | "raw" | "endraw" | "limit" | "offset" | "reversed" |
        "cycle" | "include" | "render" | "liquid" | "echo"
    )
}