1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
use crate::CompilationError;
use crate::schemas::path;
use crate::step::Node;

use preserves::value::BinarySource;
use preserves::value::BytesBinarySource;
use preserves::value::IOValue;
use preserves::value::NestedValue;
use preserves::value::Reader;

use std::iter::Iterator;

#[derive(Debug)]
enum Binop {
    Union,
    Intersection,
}

fn split_values_by_symbol<'a>(tokens: &'a [IOValue], separator: &str) -> Vec<&'a [IOValue]> {
    tokens
        .split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator))
        .collect()
}

fn split_binop(tokens: &[IOValue]) -> Result<(Vec<&[IOValue]>, Option<Binop>), CompilationError> {
    let union_pieces = split_values_by_symbol(&tokens, "+");
    let intersection_pieces = split_values_by_symbol(&tokens, "&");
    match (union_pieces.len(), intersection_pieces.len()) {
        (1, 1) => Ok((union_pieces, None)),
        (_, 1) => Ok((union_pieces, Some(Binop::Union))),
        (1, _) => Ok((intersection_pieces, Some(Binop::Intersection))),
        _ => Err(CompilationError::MixedOperators),
    }
}

pub fn parse_selector(tokens: &[IOValue]) -> Result<path::Selector, CompilationError> {
    let mut steps = Vec::new();
    let mut tokens = tokens;
    while let Some((s, remaining)) = parse_step(tokens)? {
        steps.push(s);
        tokens = remaining;
    }
    Ok(path::Selector(steps))
}

pub fn parse_predicate(tokens: &[IOValue]) -> Result<path::Predicate, CompilationError> {
    let (pieces, binop) = split_binop(tokens)?;
    match binop {
        None => parse_non_binop(&pieces[0]),
        Some(o) => {
            let preds = pieces.into_iter().map(|ts| parse_non_binop(&ts)).collect::<Result<_,_>>()?;
            Ok(match o {
                Binop::Union => path::Predicate::Or { preds },
                Binop::Intersection => path::Predicate::And { preds },
            })
        }
    }
}

fn parse_non_binop(tokens: &[IOValue]) -> Result<path::Predicate, CompilationError> {
    if !tokens.is_empty() {
        let t = tokens[0].value();

        if let Some("!") = t.as_symbol().map(|s| s.as_str()) {
            return Ok(path::Predicate::Not { pred: Box::new(parse_non_binop(&tokens[1..])?) });
        }
    }

    Ok(path::Predicate::Selector(Box::new(parse_selector(tokens)?)))
}

fn parse_step(tokens: &[IOValue]) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
    if tokens.is_empty() {
        return Ok(None);
    }

    let remainder = &tokens[1..];

    if tokens[0].value().is_sequence() {
        return Ok(Some((path::Step::Filter(Box::new(path::Filter::Test {
            pred: Box::new(parse_predicate(tokens[0].value().as_sequence().unwrap())?),
        })), remainder)));
    }

    match tokens[0].value().as_symbol() {
        None => return Err(CompilationError::InvalidStep),
        Some(t) => match t.as_str() {
            "/" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Values)), remainder))),
            "//" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Descendants)), remainder))),
            "." => {
                let (key, remainder) = pop_step_arg(remainder)?;
                Ok(Some((path::Step::Axis(Box::new(path::Axis::At { key })), remainder)))
            }
            ".^" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Label)), remainder))),
            ".keys" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Keys)), remainder))),
            ".length" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Length)), remainder))),
            ".annotations" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Annotations)), remainder))),
            ".embedded" => Ok(Some((path::Step::Axis(Box::new(path::Axis::Embedded)), remainder))),

            "*" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Nop)), remainder))),
            "eq" | "=" => parse_comparison(remainder, path::Comparison::Eq),
            "ne" | "!=" => parse_comparison(remainder, path::Comparison::Ne),
            "lt" => parse_comparison(remainder, path::Comparison::Lt),
            "gt" => parse_comparison(remainder, path::Comparison::Gt),
            "le" => parse_comparison(remainder, path::Comparison::Le),
            "ge" => parse_comparison(remainder, path::Comparison::Ge),
            "re" | "=r" => {
                let (regex_val, remainder) = pop_step_arg(remainder)?;
                let regex = regex_val.value().to_string().map_err(|_| CompilationError::InvalidStep)?.clone();
                let _ = regex::Regex::new(&regex)?;
                Ok(Some((path::Step::Filter(Box::new(path::Filter::Regex { regex })), remainder)))
            }
            "^" => {
                let (literal, remainder) = pop_step_arg(remainder)?;
                Ok(Some((path::Step::Filter(Box::new(path::Filter::Test {
                    pred: Box::new(path::Predicate::Selector(Box::new(path::Selector(vec![
                        path::Step::Axis(Box::new(path::Axis::Label)),
                        path::Step::Filter(Box::new(path::Filter::Compare {
                            op: Box::new(path::Comparison::Eq),
                            literal,
                        })),
                    ])))),
                })), remainder)))
            }

            "~real" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Real)), remainder))),
            "~int" => Ok(Some((path::Step::Filter(Box::new(path::Filter::Int)), remainder))),

            "bool" => Ok(Some((path::Step::from(path::ValueKind::Boolean), remainder))),
            "float" => Ok(Some((path::Step::from(path::ValueKind::Float), remainder))),
            "double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))),
            "int" => Ok(Some((path::Step::from(path::ValueKind::SignedInteger), remainder))),
            "string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))),
            "bytes" => Ok(Some((path::Step::from(path::ValueKind::ByteString), remainder))),
            "symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))),
            "rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))),
            "seq" => Ok(Some((path::Step::from(path::ValueKind::Sequence), remainder))),
            "set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))),
            "dict" => Ok(Some((path::Step::from(path::ValueKind::Dictionary), remainder))),
            "embedded" => Ok(Some((path::Step::from(path::ValueKind::Embedded), remainder))),

            _ => Err(CompilationError::InvalidStep),
        }
    }
}

impl From<path::ValueKind> for path::Step {
    fn from(k: path::ValueKind) -> Self {
        path::Step::Filter(Box::new(path::Filter::Kind {
            kind: Box::new(k),
        }))
    }
}

fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> {
    if tokens.is_empty() {
        return Err(CompilationError::InvalidStep);
    }
    Ok((tokens[0].clone(), &tokens[1..]))
}

fn parse_comparison(
    tokens: &[IOValue],
    op: path::Comparison,
) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
    let (literal, remainder) = pop_step_arg(tokens)?;
    Ok(Some((path::Step::Filter(Box::new(path::Filter::Compare {
        op: Box::new(op),
        literal,
    })), remainder)))
}

impl std::str::FromStr for path::Selector {
    type Err = CompilationError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        parse_selector(&(BytesBinarySource::new(s.as_bytes())
                         .text_iovalues()
                         .configured(false)
                         .collect::<Result<Vec<_>, _>>()?))
    }
}

impl std::str::FromStr for Node {
    type Err = CompilationError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let expr = path::Selector::from_str(s)?;
        expr.compile()
    }
}