Skip to main content

shape_ast/parser/expressions/
data_refs.rs

1//! Data reference expression parsing
2//!
3//! This module handles parsing of generic data/DataFrame references:
4//! - Simple data references: data[0], data[-1]
5//! - Timeframe-specific references: data(5m)[0]
6//! - DateTime-based references: data[@2024-01-01]
7//! - Relative access: data[@today][-1]
8//! - Index parsing and range expressions
9
10use super::super::pair_span;
11use crate::ast::{DataDateTimeRef, DataIndex, DataRef, Expr, Literal, Timeframe, UnaryOp};
12use crate::error::{Result, ShapeError};
13use crate::parser::Rule;
14use pest::iterators::Pair;
15
16/// Parse a data reference
17pub fn parse_data_ref(pair: Pair<Rule>) -> Result<Expr> {
18    let span = pair_span(&pair);
19    let mut inner = pair.into_inner();
20    let mut timeframe: Option<Timeframe> = None;
21
22    // Check if the first item is a timeframe specification
23    if let Some(first) = inner.peek() {
24        if first.as_rule() == Rule::timeframe_spec {
25            let timeframe_spec = inner.next().unwrap();
26            let timeframe_inner = timeframe_spec.into_inner().next().unwrap();
27
28            // Parse the timeframe
29            match timeframe_inner.as_rule() {
30                Rule::timeframe => {
31                    timeframe = Timeframe::parse(timeframe_inner.as_str());
32                    if timeframe.is_none() {
33                        return Err(ShapeError::ParseError {
34                            message: format!("Invalid timeframe: {}", timeframe_inner.as_str()),
35                            location: None,
36                        });
37                    }
38                }
39                Rule::expression => {
40                    // Dynamic timeframe expressions require runtime evaluation
41                    // The grammar allows them, but the AST DataRef only supports static timeframes
42                    // This would require a DataRef variant with Box<Expr> for dynamic timeframes
43                    return Err(ShapeError::ParseError {
44                        message: "Dynamic timeframe expressions in data references require runtime evaluation. Use a static timeframe like data(5m)[0] instead.".to_string(),
45                        location: None,
46                    });
47                }
48                _ => {}
49            }
50        }
51    }
52
53    // Parse the access part (required - grammar enforces this)
54    let access = inner.next().ok_or_else(|| ShapeError::ParseError {
55        message: "data reference requires brackets: data[0], data[-1], data[@datetime]".to_string(),
56        location: None,
57    })?;
58
59    match access.as_rule() {
60        Rule::datetime_access => {
61            // Parse datetime-based access
62            let mut datetime_inner = access.into_inner();
63            let datetime_expr_pair = datetime_inner.next().unwrap();
64            let (start_expr, end_expr) = super::temporal::parse_datetime_range(datetime_expr_pair)?;
65            if end_expr.is_some() {
66                return Err(ShapeError::ParseError {
67                    message: "Datetime ranges are not supported in data access".to_string(),
68                    location: None,
69                });
70            }
71            let datetime_expr = match start_expr {
72                Expr::DateTime(expr, _) => expr,
73                _ => {
74                    return Err(ShapeError::ParseError {
75                        message: "Expected datetime expression in data access".to_string(),
76                        location: None,
77                    });
78                }
79            };
80
81            // Check for optional timeframe parameter
82            let mut datetime_timeframe: Option<Timeframe> = None;
83            let next_item = datetime_inner.peek();
84
85            if let Some(item) = next_item {
86                match item.as_rule() {
87                    Rule::timeframe => {
88                        let tf_pair = datetime_inner.next().unwrap();
89                        datetime_timeframe = Timeframe::parse(tf_pair.as_str());
90                        if datetime_timeframe.is_none() {
91                            return Err(ShapeError::ParseError {
92                                message: format!("Invalid timeframe: {}", tf_pair.as_str()),
93                                location: None,
94                            });
95                        }
96                    }
97                    Rule::expression => {
98                        // Dynamic timeframe expressions require runtime evaluation
99                        return Err(ShapeError::ParseError {
100                                message: "Dynamic timeframe expressions in data references require runtime evaluation. Use a static timeframe instead."
101                                    .to_string(),
102                                location: None,
103                            });
104                    }
105                    Rule::index_access => {
106                        // This is an index access, not a timeframe
107                    }
108                    _ => {}
109                }
110            }
111
112            // Use the timeframe from datetime_access if present, otherwise use the one from timeframe_spec
113            let final_timeframe = datetime_timeframe.or(timeframe);
114
115            // Check if there's a subsequent index access
116            if let Some(index_access) = datetime_inner.next() {
117                if index_access.as_rule() == Rule::index_access {
118                    // This is a relative access from a datetime reference
119                    // Timeframe is already captured in the DataDateTimeRef
120                    let (index, _) = parse_index_expr(index_access.into_inner().next().unwrap())?;
121                    Ok(Expr::DataRelativeAccess {
122                        reference: Box::new(Expr::DataDateTimeRef(
123                            DataDateTimeRef {
124                                datetime: datetime_expr,
125                                timezone: None,
126                                timeframe: final_timeframe,
127                            },
128                            span,
129                        )),
130                        index,
131                        span,
132                    })
133                } else {
134                    // Just a datetime reference
135                    Ok(Expr::DataDateTimeRef(
136                        DataDateTimeRef {
137                            datetime: datetime_expr,
138                            timezone: None,
139                            timeframe: final_timeframe,
140                        },
141                        span,
142                    ))
143                }
144            } else {
145                // Just a datetime reference
146                Ok(Expr::DataDateTimeRef(
147                    DataDateTimeRef {
148                        datetime: datetime_expr,
149                        timezone: None,
150                        timeframe: final_timeframe,
151                    },
152                    span,
153                ))
154            }
155        }
156        Rule::index_access => {
157            // Traditional integer-based access
158            let index_expr = access.into_inner().next().unwrap();
159            let (index, index_timeframe) = parse_index_expr(index_expr)?;
160            // Use the timeframe from index_expr if present, otherwise use the one from timeframe_spec
161            let final_timeframe = index_timeframe.or(timeframe);
162            Ok(Expr::DataRef(
163                DataRef {
164                    index,
165                    timeframe: final_timeframe,
166                },
167                span,
168            ))
169        }
170        _ => Err(ShapeError::ParseError {
171            message: format!("Unexpected data access type: {:?}", access.as_rule()),
172            location: None,
173        }),
174    }
175}
176
177/// Parse index expression (with optional timeframe)
178pub fn parse_index_expr(pair: Pair<Rule>) -> Result<(DataIndex, Option<Timeframe>)> {
179    // This parses index_expr which can be:
180    // - expression (single index)
181    // - expression:expression (range)
182    // - expression, timeframe (single index with timeframe)
183    // - expression:expression, timeframe (range with timeframe)
184
185    let span = pair_span(&pair);
186    let mut inner = pair.into_inner();
187    let first_expr = inner.next().unwrap();
188
189    // First, try to parse as an integer for optimization
190    let index = if first_expr.as_rule() == Rule::integer {
191        let first_val: i32 = first_expr
192            .as_str()
193            .parse()
194            .map_err(|e| ShapeError::ParseError {
195                message: format!("Invalid integer: {}", e),
196                location: None,
197            })?;
198
199        // Check if there's a colon (range indicator)
200        let mut has_range = false;
201        let mut range_end = None;
202
203        if let Some(next) = inner.peek() {
204            if next.as_rule() == Rule::expression {
205                // Could be a range
206                has_range = true;
207                let second_expr = inner.next().unwrap();
208                if second_expr.as_rule() == Rule::integer {
209                    let second_val: i32 =
210                        second_expr
211                            .as_str()
212                            .parse()
213                            .map_err(|e| ShapeError::ParseError {
214                                message: format!("Invalid integer: {}", e),
215                                location: None,
216                            })?;
217                    range_end = Some(second_val);
218                } else {
219                    // Expression range
220                    let expr = super::parse_expression(second_expr)?;
221                    return Ok((
222                        DataIndex::ExpressionRange(
223                            Box::new(Expr::Literal(Literal::Number(first_val as f64), span)),
224                            Box::new(expr),
225                        ),
226                        parse_optional_timeframe(&mut inner)?,
227                    ));
228                }
229            }
230        }
231
232        if has_range && range_end.is_some() {
233            DataIndex::Range(first_val, range_end.unwrap())
234        } else {
235            DataIndex::Single(first_val)
236        }
237    } else {
238        // Parse as expression
239        let expr = super::parse_expression(first_expr)?;
240        if let Expr::Range {
241            ref start, ref end, ..
242        } = expr
243        {
244            // Range expression inside index, treat as data range.
245            // Both start and end must be present for data ranges
246            if let (Some(start_expr), Some(end_expr)) = (start, end) {
247                if let (Some(start_const), Some(end_const)) = (
248                    try_evaluate_constant_index(start_expr),
249                    try_evaluate_constant_index(end_expr),
250                ) {
251                    let timeframe = parse_optional_timeframe(&mut inner)?;
252                    return Ok((DataIndex::Range(start_const, end_const), timeframe));
253                }
254
255                let timeframe = parse_optional_timeframe(&mut inner)?;
256                return Ok((
257                    DataIndex::ExpressionRange(start_expr.clone(), end_expr.clone()),
258                    timeframe,
259                ));
260            }
261        }
262
263        // Check if it's a constant
264        if let Some(const_val) = try_evaluate_constant_index(&expr) {
265            // Check for range
266            if let Some(next) = inner.peek() {
267                if next.as_rule() == Rule::expression {
268                    let second_expr = super::parse_expression(inner.next().unwrap())?;
269                    if let Some(second_const) = try_evaluate_constant_index(&second_expr) {
270                        DataIndex::Range(const_val, second_const)
271                    } else {
272                        DataIndex::ExpressionRange(
273                            Box::new(Expr::Literal(Literal::Number(const_val as f64), span)),
274                            Box::new(second_expr),
275                        )
276                    }
277                } else {
278                    DataIndex::Single(const_val)
279                }
280            } else {
281                DataIndex::Single(const_val)
282            }
283        } else {
284            // Dynamic expression
285            if let Some(next) = inner.peek() {
286                if next.as_rule() == Rule::expression {
287                    let second_expr = super::parse_expression(inner.next().unwrap())?;
288                    DataIndex::ExpressionRange(Box::new(expr), Box::new(second_expr))
289                } else {
290                    DataIndex::Expression(Box::new(expr))
291                }
292            } else {
293                DataIndex::Expression(Box::new(expr))
294            }
295        }
296    };
297
298    // Now parse the optional timeframe
299    let timeframe = parse_optional_timeframe(&mut inner)?;
300
301    Ok((index, timeframe))
302}
303
304/// Parse optional timeframe
305pub fn parse_optional_timeframe(
306    inner: &mut pest::iterators::Pairs<Rule>,
307) -> Result<Option<Timeframe>> {
308    if let Some(next) = inner.next() {
309        match next.as_rule() {
310            Rule::timeframe => {
311                let tf = Timeframe::parse(next.as_str());
312                if tf.is_none() {
313                    return Err(ShapeError::ParseError {
314                        message: format!("Invalid timeframe: {}", next.as_str()),
315                        location: None,
316                    });
317                }
318                Ok(tf)
319            }
320            Rule::expression => {
321                // Dynamic timeframe expression - not supported yet
322                Err(ShapeError::ParseError {
323                    message: "Dynamic timeframe expressions not yet supported".to_string(),
324                    location: None,
325                })
326            }
327            _ => Err(ShapeError::ParseError {
328                message: format!("Expected timeframe or expression, got {:?}", next.as_rule()),
329                location: None,
330            }),
331        }
332    } else {
333        Ok(None)
334    }
335}
336
337/// Try to evaluate an expression as a constant integer at parse time
338fn try_evaluate_constant_index(expr: &Expr) -> Option<i32> {
339    match expr {
340        Expr::Literal(Literal::Number(n), _) => Some(*n as i32),
341        Expr::UnaryOp {
342            op: UnaryOp::Neg,
343            operand,
344            ..
345        } => {
346            if let Expr::Literal(Literal::Number(n), _) = operand.as_ref() {
347                Some(-(*n as i32))
348            } else {
349                None
350            }
351        }
352        _ => None,
353    }
354}
355
356/// Parse a general index expression (can be any expression, not just integers)
357pub fn parse_index_expr_general(pair: Pair<Rule>) -> Result<(Expr, Option<Expr>)> {
358    // The pair here is the full index_expr, we need to look at its contents
359    match pair.as_rule() {
360        Rule::index_expr => {
361            // Handle the actual parsing of index_expr contents
362            let mut inner = pair.into_inner();
363            let first_pair = inner.next().unwrap();
364
365            // Check if first element is a datetime_range
366            let (first_expr, mut second_expr) = match first_pair.as_rule() {
367                Rule::datetime_range => {
368                    // Parse datetime range directly
369                    super::temporal::parse_datetime_range(first_pair)?
370                }
371                _ => {
372                    // Parse as regular expression
373                    let expr = super::parse_expression(first_pair)?;
374                    (expr, None)
375                }
376            };
377
378            // Check if there's a colon and second part (range)
379            if let Some(next_pair) = inner.next() {
380                // This should be the second part of the range
381                match next_pair.as_rule() {
382                    Rule::datetime_range => {
383                        let (range_end, _) = super::temporal::parse_datetime_range(next_pair)?;
384                        second_expr = Some(range_end);
385                    }
386                    Rule::expression => {
387                        second_expr = Some(super::parse_expression(next_pair)?);
388                    }
389                    _ => {
390                        // Skip timeframe or other tokens
391                    }
392                }
393            }
394
395            if second_expr.is_none() {
396                if let Expr::Range {
397                    ref start, ref end, ..
398                } = first_expr
399                {
400                    // For ranges with both start and end, extract them
401                    if let (Some(s), Some(e)) = (start, end) {
402                        return Ok((*s.clone(), Some(*e.clone())));
403                    }
404                }
405            }
406
407            Ok((first_expr, second_expr))
408        }
409        _ => {
410            // Fallback for when called with other rules
411            let mut inner = pair.into_inner();
412            let first = super::parse_expression(inner.next().unwrap())?;
413
414            // Check if there's a second expression (range)
415            if let Some(second) = inner.next() {
416                let end = super::parse_expression(second)?;
417                Ok((first, Some(end)))
418            } else {
419                Ok((first, None))
420            }
421        }
422    }
423}