jarq 0.8.2

An interactive jq-like JSON query tool with a TUI
Documentation
//! Primitive parsing: integers, string literals, identifiers

use nom::{
    Parser,
    branch::alt,
    bytes::complete::tag,
    character::complete::{char, digit1},
    combinator::{opt, recognize},
};
use nom_locate::LocatedSpan;

use crate::filter::ast::{CompareOp, Span};

/// Input type for all parsers - tracks position automatically
pub type Input<'a> = LocatedSpan<&'a str>;

/// Create a Span from start and end Input positions
pub fn make_span(start: &Input<'_>, end: &Input<'_>) -> Span {
    Span::new(start.location_offset(), end.location_offset())
}

pub fn parse_integer(input: Input<'_>) -> nom::IResult<Input<'_>, i64> {
    let (input, s) = recognize(|i| {
        let (i, _) = opt(char('-')).parse(i)?;
        digit1(i)
    })
    .parse(input)?;

    let n: i64 = s.fragment().parse().map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;

    Ok((input, n))
}

use simd_json::StaticNode;

pub fn parse_number(input: Input<'_>) -> nom::IResult<Input<'_>, StaticNode> {
    let (rest, s) = recognize(|i| {
        let (i, _) = opt(char('-')).parse(i)?;
        let (i, _) = digit1(i)?;
        // Optional decimal part
        let (i, has_decimal) = opt(|i| {
            let (i, _) = char('.').parse(i)?;
            digit1(i)
        })
        .parse(i)?;
        // Optional exponent
        let (i, has_exp) = opt(|i| {
            let (i, _) = alt((char('e'), char('E'))).parse(i)?;
            let (i, _) = opt(alt((char('+'), char('-')))).parse(i)?;
            digit1(i)
        })
        .parse(i)?;
        Ok((i, (has_decimal, has_exp)))
    })
    .parse(input)?;

    let s_str = *s.fragment();

    // Determine if this is a float or integer
    let is_float = s_str.contains('.') || s_str.contains('e') || s_str.contains('E');

    if is_float {
        let f: f64 = s_str.parse().map_err(|_| {
            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Float))
        })?;
        Ok((rest, StaticNode::F64(f)))
    } else if s_str.starts_with('-') {
        let i: i64 = s_str.parse().map_err(|_| {
            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
        })?;
        Ok((rest, StaticNode::I64(i)))
    } else {
        // Try u64 first, then i64 for positive numbers
        if let Ok(u) = s_str.parse::<u64>() {
            if u <= i64::MAX as u64 {
                Ok((rest, StaticNode::I64(u as i64)))
            } else {
                Ok((rest, StaticNode::U64(u)))
            }
        } else {
            Err(nom::Err::Error(nom::error::Error::new(
                input,
                nom::error::ErrorKind::Digit,
            )))
        }
    }
}

pub fn parse_compare_op(input: Input<'_>) -> nom::IResult<Input<'_>, CompareOp> {
    alt((
        tag("==").map(|_| CompareOp::Eq),
        tag("!=").map(|_| CompareOp::Ne),
        tag("<=").map(|_| CompareOp::Le),
        tag(">=").map(|_| CompareOp::Ge),
        tag("<").map(|_| CompareOp::Lt),
        tag(">").map(|_| CompareOp::Gt),
    ))
    .parse(input)
}

pub fn parse_string_literal(input: Input<'_>) -> nom::IResult<Input<'_>, String> {
    let (input, _) = char('"').parse(input)?;

    let mut result = String::new();
    let input_str = *input.fragment();
    let mut chars = input_str.chars();
    let mut consumed = 0;

    loop {
        match chars.next() {
            None => {
                return Err(nom::Err::Error(nom::error::Error::new(
                    input,
                    nom::error::ErrorKind::Char,
                )));
            }
            Some('"') => {
                consumed += 1;
                break;
            }
            Some('\\') => {
                consumed += 1;
                match chars.next() {
                    None => {
                        return Err(nom::Err::Error(nom::error::Error::new(
                            input,
                            nom::error::ErrorKind::Char,
                        )));
                    }
                    Some('n') => {
                        result.push('\n');
                        consumed += 1;
                    }
                    Some('r') => {
                        result.push('\r');
                        consumed += 1;
                    }
                    Some('t') => {
                        result.push('\t');
                        consumed += 1;
                    }
                    Some('\\') => {
                        result.push('\\');
                        consumed += 1;
                    }
                    Some('"') => {
                        result.push('"');
                        consumed += 1;
                    }
                    Some('/') => {
                        result.push('/');
                        consumed += 1;
                    }
                    Some('b') => {
                        result.push('\u{0008}');
                        consumed += 1;
                    }
                    Some('f') => {
                        result.push('\u{000C}');
                        consumed += 1;
                    }
                    Some('u') => {
                        consumed += 1;
                        let mut hex = String::new();
                        for _ in 0..4 {
                            match chars.next() {
                                Some(c) if c.is_ascii_hexdigit() => {
                                    hex.push(c);
                                    consumed += c.len_utf8();
                                }
                                _ => {
                                    return Err(nom::Err::Error(nom::error::Error::new(
                                        input,
                                        nom::error::ErrorKind::HexDigit,
                                    )));
                                }
                            }
                        }
                        let code = u32::from_str_radix(&hex, 16).map_err(|_| {
                            nom::Err::Error(nom::error::Error::new(
                                input,
                                nom::error::ErrorKind::HexDigit,
                            ))
                        })?;
                        let c = char::from_u32(code).ok_or_else(|| {
                            nom::Err::Error(nom::error::Error::new(
                                input,
                                nom::error::ErrorKind::Char,
                            ))
                        })?;
                        result.push(c);
                    }
                    Some(_) => {
                        return Err(nom::Err::Error(nom::error::Error::new(
                            input,
                            nom::error::ErrorKind::Char,
                        )));
                    }
                }
            }
            Some(c) => {
                result.push(c);
                consumed += c.len_utf8();
            }
        }
    }

    // Advance the LocatedSpan by consumed bytes
    let remaining = unsafe {
        Input::new_from_raw_offset(
            input.location_offset() + consumed,
            input.location_line(),
            &input_str[consumed..],
            (),
        )
    };

    Ok((remaining, result))
}

pub fn identifier(input: Input<'_>) -> nom::IResult<Input<'_>, String> {
    let input_str = *input.fragment();
    let mut chars = input_str.chars();
    let first = chars.next().ok_or_else(|| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Alpha))
    })?;

    if !first.is_alphabetic() && first != '_' {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Alpha,
        )));
    }

    let mut len = first.len_utf8();
    for c in chars {
        if c.is_alphanumeric() || c == '_' {
            len += c.len_utf8();
        } else {
            break;
        }
    }

    // Advance the LocatedSpan by len bytes
    let remaining = unsafe {
        Input::new_from_raw_offset(
            input.location_offset() + len,
            input.location_line(),
            &input_str[len..],
            (),
        )
    };

    Ok((remaining, input_str[..len].to_string()))
}