json_parser_with_pest/
parser.rs

1use anyhow::{Error, Result};
2use log::{error, info};
3use pest::Parser;
4use pest_derive::Parser;
5use serde_json::{Map, Value};
6use std::fs;
7use std::path::Path;
8use std::str::FromStr;
9use thiserror::Error;
10
11
12/// JSONParser struct, generated from the grammar defined in `json.pest`.
13/// This struct is used to parse JSON based on the defined rules in the `json.pest` grammar file.
14///
15/// # Grammar Documentation
16///
17/// ## WHITESPACE
18/// Matches spaces, tabs, and newlines. Used to ignore non-significant whitespace.
19///
20/// - Example:
21///   - Input: `"  \n\t"`
22///   - Match: `WHITESPACE`
23///
24/// ## json
25/// Entry point for JSON. Matches the entire JSON document.
26///
27/// - Example:
28///   - Input: `{ "key": "value" }`
29///   - Match: Valid JSON structure.
30///
31/// ## value
32/// Matches any valid JSON value, including:
33/// - Objects: `{ "key": "value" }`
34/// - Arrays: `[1, 2, 3]`
35/// - Strings: `"string"`
36/// - Numbers: `123.45`
37/// - Booleans: `true`, `false`
38/// - Null: `null`
39///
40/// ## object
41/// Matches a JSON object, which contains key-value pairs.
42///
43/// - Example:
44///   - Input: `{ "key": "value", "key2": "value2" }`
45///   - Match: Valid JSON object.
46///
47/// ## array
48/// Matches a JSON array, which contains a list of values.
49///
50/// - Example:
51///   - Input: `[1, "two", null, false]`
52///   - Match: Valid JSON array.
53///
54/// ## string
55/// Matches a JSON string, supporting escape sequences and Unicode.
56///
57/// - Example:
58///   - Input: `"Hello World"`
59///   - Match: Valid JSON string.
60///
61/// ## number
62/// Matches a JSON number, including integers and floating-point numbers.
63///
64/// - Example:
65///   - Input: `123`, `-0.45`, `1e10`
66///   - Match: Valid JSON number.
67///
68/// ## boolean
69/// Matches `true` or `false`.
70///
71/// - Example:
72///   - Input: `true`
73///   - Match: Boolean true value.
74///
75/// ## null
76/// Matches the literal `null`.
77///
78/// - Example:
79///   - Input: `null`
80///   - Match: JSON null value.
81///
82/// ## date
83/// Matches dates in the format `YYYY-MM-DD`.
84///
85/// - Example:
86///   - Input: `2023-11-15`
87///   - Match: Valid date string.
88///
89/// ## identifier
90/// Matches a valid identifier starting with a letter, followed by letters, digits, or `_`.
91///
92/// - Example:
93///   - Input: `key_name`
94///   - Match: Valid identifier.
95///
96/// ## version
97/// Matches a semantic version number in `SemVer` format.
98///
99/// - Example:
100///   - Input: `1.0.0`, `2.1.3-alpha`
101///   - Match: Valid version string.
102///
103/// ## key_value_array
104/// Matches an array of objects where each object is a key-value pair.
105///
106/// - Example:
107///   - Input: `[ { "key1": "value1" }, { "key2": "value2" } ]`
108///   - Match: Valid JSON key-value array.
109
110
111#[derive(Parser)]
112#[grammar = "json.pest"]
113pub struct JSONParser;
114
115/// Custom error type for JSON parsing, schema validation, and file-related errors.
116#[derive(Error, Debug)]
117pub enum ParserError {
118    #[error("File read error: {0}")]
119    FileReadError(#[from] std::io::Error),
120    #[error("JSON parse error")]
121    JsonParseError,
122    #[error("Schema validation failed")]
123    SchemaValidationError,
124}
125
126/// Parses a JSON string using the `JSONParser` and converts it to a `serde_json::Value`.
127/// Returns a `Result` with `Value` on success or `ParserError` on failure.
128///
129/// # Arguments
130///
131/// * `json_str` - The JSON string to be parsed.
132///
133/// # Returns
134///
135/// * `Result<Value, ParserError>` - The parsed JSON as a `serde_json::Value` if successful, or an error on failure.
136pub fn parse_json(json_str: &str) -> Result<Value, ParserError> {
137    let pairs = JSONParser::parse(Rule::json, json_str).map_err(|e| {
138        println!("Parsing error in JSON input: {:?}", e);
139        ParserError::JsonParseError
140    })?;
141    parse_value(pairs)
142}
143
144/// Recursively processes `pest` parsing results and converts them to `serde_json::Value`.
145///
146/// # Arguments
147///
148/// * `pairs` - The parsed pairs of tokens from `pest`.
149///
150/// # Returns
151///
152/// * `Result<Value, ParserError>` - A `serde_json::Value` representing the parsed JSON structure, or an error if parsing fails.
153fn parse_value(mut pairs: pest::iterators::Pairs<Rule>) -> Result<Value, ParserError> {
154    let pair = pairs.next().ok_or_else(|| {
155        println!("No pairs found in input.");
156        ParserError::JsonParseError
157    })?;
158
159    match pair.as_rule() {
160        Rule::json => parse_value(pair.into_inner()),
161        Rule::object => parse_object(pair),
162        Rule::array => parse_array(pair),
163        Rule::string => Ok(Value::String(parse_string(pair)?)),
164        Rule::number => Ok(Value::Number(parse_number(pair)?)),
165        Rule::boolean => Ok(Value::Bool(pair.as_str() == "true")),
166        Rule::null => Ok(Value::Null),
167        _ => {
168            println!("Unexpected pair encountered: {:?}", pair.as_rule());
169            Err(ParserError::JsonParseError)
170        }
171    }
172}
173
174/// Parses a JSON object and returns it as a `serde_json::Value`.
175/// Handles JSON objects by parsing each key-value pair.
176///
177/// # Arguments
178///
179/// * `pair` - The `pest::iterators::Pair` containing the JSON object.
180///
181/// # Returns
182///
183/// * `Result<Value, ParserError>` - Returns a `serde_json::Value::Object` on success, or an error if parsing fails.
184fn parse_object(pair: pest::iterators::Pair<Rule>) -> Result<Value, ParserError> {
185    let mut map = Map::new();
186    for inner_pair in pair.into_inner() {
187        if inner_pair.as_rule() == Rule::pair {
188            let mut inner_rules = inner_pair.into_inner();
189            let key = parse_string(inner_rules.next().ok_or(ParserError::JsonParseError)?)?;
190            let value = parse_value(inner_rules)?;
191            map.insert(key, value);
192        }
193    }
194    Ok(Value::Object(map))
195}
196
197/// Parses a JSON array and returns it as a `serde_json::Value`.
198/// Processes each array element and collects them into a `Vec<Value>`.
199///
200/// # Arguments
201///
202/// * `pair` - The `pest::iterators::Pair` containing the JSON array.
203///
204/// # Returns
205///
206/// * `Result<Value, ParserError>` - Returns a `serde_json::Value::Array` on success, or an error if parsing fails.
207fn parse_array(pair: pest::iterators::Pair<Rule>) -> Result<Value, ParserError> {
208    let mut array = Vec::new();
209    for inner_pair in pair.into_inner() {
210        let value = parse_value(inner_pair.into_inner())?;
211        array.push(value);
212    }
213    Ok(Value::Array(array))
214}
215
216/// Parses a JSON string, handling escape sequences and Unicode characters.
217///
218/// # Arguments
219///
220/// * `pair` - The `pest::iterators::Pair` containing the JSON string.
221///
222/// # Returns
223///
224/// * `Result<String, ParserError>` - The parsed string or an error if parsing fails.
225fn parse_string(pair: pest::iterators::Pair<Rule>) -> Result<String, ParserError> {
226    let mut result = String::new();
227    for inner_pair in pair.into_inner() {
228        match inner_pair.as_rule() {
229            Rule::character => result.push_str(inner_pair.as_str()),
230            Rule::escape_sequence => {
231                let escaped = match inner_pair.as_str() {
232                    "\\\"" => "\"",
233                    "\\\\" => "\\",
234                    "\\/" => "/",
235                    "\\b" => "\u{0008}",
236                    "\\f" => "\u{000C}",
237                    "\\n" => "\n",
238                    "\\r" => "\r",
239                    "\\t" => "\t",
240                    escape if escape.starts_with("\\u") => {
241                        let hex = &escape[2..];
242                        let code_point = u32::from_str_radix(hex, 16)
243                            .map_err(|_| ParserError::JsonParseError)?;
244                        let unicode_char = std::char::from_u32(code_point)
245                            .ok_or(ParserError::JsonParseError)?
246                            .to_string();
247                        result.push_str(&unicode_char);
248                        continue;
249                    }
250                    _ => return Err(ParserError::JsonParseError),
251                };
252                result.push_str(escaped);
253            }
254            _ => {}
255        }
256    }
257    Ok(result)
258}
259
260/// Parses a JSON number and converts it to a `serde_json::Number`.
261///
262/// # Arguments
263///
264/// * `pair` - The `pest::iterators::Pair` containing the JSON number.
265///
266/// # Returns
267///
268/// * `Result<serde_json::Number, ParserError>` - The parsed number or an error if parsing fails.
269fn parse_number(pair: pest::iterators::Pair<Rule>) -> Result<serde_json::Number, ParserError> {
270    let number_str = pair.as_str();
271    serde_json::Number::from_str(number_str).map_err(|_| ParserError::JsonParseError)
272}
273
274/// Validates a JSON object against a schema.
275/// Checks that all keys in the schema are present in the JSON object.
276///
277/// # Arguments
278///
279/// * `json` - The JSON object to validate.
280/// * `schema` - The schema to validate against.
281///
282/// # Returns
283///
284/// * `Result<(), ParserError>` - Returns Ok if validation is successful, or an error if validation fails.
285pub fn validate_json_schema(json: &Value, schema: &Value) -> Result<(), ParserError> {
286    if json.is_object() && schema.is_object() {
287        if json
288            .as_object()
289            .unwrap()
290            .keys()
291            .all(|key| schema.as_object().unwrap().contains_key(key))
292        {
293            Ok(())
294        } else {
295            Err(ParserError::SchemaValidationError)
296        }
297    } else {
298        Err(ParserError::SchemaValidationError)
299    }
300}
301
302/// Parses a specific part of the JSON file by a given key.
303/// Returns `Some(Value)` if the key exists, otherwise `None`.
304///
305/// # Arguments
306///
307/// * `json` - The JSON object to parse.
308/// * `key` - The key to extract from the JSON.
309///
310/// # Returns
311///
312/// * `Option<Value>` - The extracted value or `None` if the key is not found.
313pub fn parse_partial_json(json: &Value, key: &str) -> Option<Value> {
314    json.get(key).cloned()
315}
316
317/// Edits a JSON file by updating a specific key with a new value.
318///
319/// # Arguments
320///
321/// * `json` - A mutable reference to the JSON object to edit.
322/// * `key` - The key in the JSON object to update.
323/// * `new_value` - The new value to set for the specified key.
324///
325/// # Returns
326///
327/// * `Result<(), Error>` - Returns Ok if successful, or an error if the JSON structure is invalid.
328pub fn edit_json(json: &mut Value, key: &str, new_value: Value) -> Result<(), Error> {
329    if let Some(obj) = json.as_object_mut() {
330        obj.insert(key.to_string(), new_value);
331        Ok(())
332    } else {
333        Err(Error::msg("Invalid JSON structure for editing"))
334    }
335}
336
337/// Converts JSON to YAML or XML format based on the specified format.
338///
339/// # Arguments
340///
341/// * `json` - The JSON object to convert.
342/// * `format` - The target format ("yaml" or "xml").
343///
344/// # Returns
345///
346/// * `Result<String, Error>` - The converted JSON in the specified format, or an error if the format is unsupported.
347pub fn convert_to_format(json: &Value, format: &str) -> Result<String, Error> {
348    match format {
349        "yaml" => serde_yaml::to_string(json).map_err(|e| Error::msg(e.to_string())),
350        "xml" => convert_json_to_xml(json),
351        _ => Err(Error::msg("Unsupported format")),
352    }
353}
354
355/// Converts JSON to XML format.
356///
357/// # Arguments
358///
359/// * `json` - The JSON object to convert.
360///
361/// # Returns
362///
363/// * `Result<String, Error>` - The converted JSON in XML format, or an error if conversion fails.
364fn convert_json_to_xml(json: &Value) -> Result<String, Error> {
365    let mut writer = Vec::new();
366    write_xml(json, &mut writer, "root")?;
367    String::from_utf8(writer).map_err(|e| Error::msg(e.to_string()))
368}
369
370/// Writes XML data recursively from JSON, preserving the structure.
371///
372/// # Arguments
373///
374/// * `json` - The JSON object to write as XML.
375/// * `writer` - The writer to output the XML data.
376/// * `tag_name` - The XML tag name.
377///
378/// # Returns
379///
380/// * `Result<(), Error>` - Returns Ok if writing succeeds, or an error if it fails.
381fn write_xml<W: std::io::Write>(json: &Value, writer: &mut W, tag_name: &str) -> Result<(), Error> {
382    match json {
383        Value::Object(map) => {
384            writeln!(writer, "<{}>", tag_name)?;
385            for (key, value) in map {
386                write_xml(value, writer, key)?;
387            }
388            writeln!(writer, "</{}>", tag_name)?;
389        }
390        Value::Array(arr) => {
391            for value in arr {
392                write_xml(value, writer, tag_name)?;
393            }
394        }
395        Value::String(s) => {
396            writeln!(writer, "<{0}>{1}</{0}>", tag_name, s)?;
397        }
398        Value::Number(num) => {
399            writeln!(writer, "<{0}>{1}</{0}>", tag_name, num)?;
400        }
401        Value::Bool(b) => {
402            writeln!(writer, "<{0}>{1}</{0}>", tag_name, b)?;
403        }
404        Value::Null => {
405            writeln!(writer, "<{} />", tag_name)?;
406        }
407    }
408    Ok(())
409}
410
411/// Processes large JSON files by parsing them in chunks.
412///
413/// # Arguments
414///
415/// * `file_path` - The path to the large JSON file.
416///
417/// # Returns
418///
419/// * `Result<(), ParserError>` - Returns Ok if successful, or an error if parsing fails.
420pub fn handle_large_json(file_path: &Path) -> Result<(), ParserError> {
421    let file = fs::File::open(file_path)?;
422    let stream = serde_json::Deserializer::from_reader(file).into_iter::<Value>();
423
424    for value in stream {
425        match value {
426            Ok(json_value) => info!("Parsed chunk: {:?}", json_value),
427            Err(e) => error!("Error parsing chunk: {:?}", e),
428        }
429    }
430    Ok(())
431}
432
433/// Searches for JSON keys by a specific value, returning paths where the value is found.
434///
435/// # Arguments
436///
437/// * `json` - The JSON object to search.
438/// * `target_value` - The target value to search for.
439///
440/// # Returns
441///
442/// * `Vec<String>` - A list of paths where the target value is found.
443pub fn search_by_value(json: &Value, target_value: &str) -> Vec<String> {
444    let mut results = Vec::new();
445    search_recursive(json, target_value, &mut results, "".to_string());
446    results
447}
448
449/// Recursive helper function for `search_by_value`, traversing JSON structure.
450///
451/// # Arguments
452///
453/// * `json` - The JSON object to search.
454/// * `target_value` - The target value to search for.
455/// * `results` - A mutable vector to store the found paths.
456/// * `path` - The current JSON path.
457fn search_recursive(json: &Value, target_value: &str, results: &mut Vec<String>, path: String) {
458    match json {
459        Value::Object(map) => {
460            for (key, value) in map {
461                let new_path = format!("{}.{}", path, key)
462                    .trim_start_matches('.')
463                    .to_string();
464                if value.is_string() && value.as_str().unwrap() == target_value {
465                    results.push(new_path.clone());
466                }
467                search_recursive(value, target_value, results, new_path);
468            }
469        }
470        Value::Array(arr) => {
471            for (index, item) in arr.iter().enumerate() {
472                let new_path = format!("{}[{}]", path, index);
473                search_recursive(item, target_value, results, new_path);
474            }
475        }
476        _ => {}
477    }
478}
479
480/// Retrieves a JSON value by a given path (e.g., "data.items[0].name").
481///
482/// # Arguments
483///
484/// * `json` - The JSON object to search.
485/// * `json_path` - The path to the target value.
486///
487/// # Returns
488///
489/// * `Option<Value>` - The found value or `None` if the path does not exist.
490pub fn get_by_path(json: &Value, json_path: &str) -> Option<Value> {
491    let mut current = json;
492    let parts = json_path.split('.');
493
494    for part in parts {
495        if part.contains('[') && part.contains(']') {
496            let name = &part[..part.find('[').unwrap()];
497            let index: usize = part[part.find('[').unwrap() + 1..part.find(']').unwrap()]
498                .parse()
499                .ok()?;
500            current = current.get(name)?.get(index)?;
501        } else {
502            current = current.get(part)?;
503        }
504    }
505    Some(current.clone())
506}
507
508/// Minifies JSON by removing whitespace.
509///
510/// # Arguments
511///
512/// * `json` - The JSON object to minify.
513///
514/// # Returns
515///
516/// * `String` - The minified JSON string.
517pub fn minify_json(json: &Value) -> String {
518    json.to_string()
519}
520
521/// Displays the structure of JSON, printing each key and nested value with indentation.
522///
523/// # Arguments
524///
525/// * `json` - The JSON object to display.
526pub fn display_structure(json: &Value) {
527    display_structure_recursive(json, 0);
528}
529
530/// Helper function for `display_structure` to recursively print JSON structure with indentation.
531///
532/// # Arguments
533///
534/// * `json` - The JSON object to display.
535/// * `indent` - The current indentation level.
536fn display_structure_recursive(json: &Value, indent: usize) {
537    match json {
538        Value::Object(map) => {
539            for (key, value) in map {
540                println!("{:indent$}{}", "", key, indent = indent);
541                display_structure_recursive(value, indent + 2);
542            }
543        }
544        Value::Array(arr) => {
545            for (index, item) in arr.iter().enumerate() {
546                println!("{:indent$}[{}]", "", index, indent = indent);
547                display_structure_recursive(item, indent + 2);
548            }
549        }
550        _ => {}
551    }
552}