json_five/
utils.rs

1pub(crate) fn get_line_col_char(doc: &str, byte_offset: usize) -> (usize, usize, usize) {
2    let mut lineno: usize = 1;
3    let mut colno: usize = 0;
4    if byte_offset == 0 {
5        return (1, 1, 0)
6    }
7
8    assert!(byte_offset <= doc.len(), "requested byteoffset {} is not less than or equal to doc length ({})", byte_offset, doc.len());
9
10    if byte_offset == doc.len() {
11        let last_char_pos = doc.char_indices().last().unwrap(); // (byte_off, char)
12        let (lineno, mut colno, mut codepoint_off) = get_line_col_char(doc, last_char_pos.0);
13        colno += 1;
14        codepoint_off += 1;
15        return (lineno, colno, codepoint_off);
16    }
17
18    for (codepoint_off, (byte_off, char)) in doc.char_indices().enumerate() {
19        colno += 1;
20        if char == '\n' {
21            if byte_off == byte_offset {
22                return (lineno, colno, codepoint_off)
23            }
24            lineno += 1;
25            colno = 0;
26        }
27        if byte_off < byte_offset {
28            continue
29        }
30        if byte_off == byte_offset {
31            return (lineno, colno, codepoint_off)
32        }
33        if byte_off > byte_offset {
34            unreachable!("Byteoffset lands in the middle of a character")
35        }
36    }
37    unreachable!("Reached end of document")
38}
39
40
41pub (crate) fn escape_double_quoted(input: &str) -> String {
42    // In the worst case (every char requires a backslash), the output could
43    // be roughly twice the length of `input`.
44    let mut escaped = String::with_capacity(input.len() * 2);
45
46    for c in input.chars() {
47        match c {
48            '"'  => { escaped.push('\\'); escaped.push('"');  }
49            '\\' => { escaped.push('\\'); escaped.push('\\'); }
50            '\n' => { escaped.push('\\'); escaped.push('n');  }
51            '\r' => { escaped.push('\\'); escaped.push('r');  }
52            '\t' => { escaped.push('\\'); escaped.push('t');  }
53            '/'  => { escaped.push('\\'); escaped.push('/');  }
54            '\u{0008}' => { escaped.push('\\'); escaped.push('b'); }
55            '\u{000c}' => { escaped.push('\\'); escaped.push('f'); }
56            _ => escaped.push(c),
57        }
58    }
59
60    escaped
61}
62
63#[allow(dead_code)]
64pub (crate) fn escape_single_quoted(input: &str) -> String {
65    let mut escaped = String::with_capacity(input.len() * 2);
66    for c in input.chars() {
67        match c {
68            '\''  => { escaped.push('\\'); escaped.push('"');  }
69            '\\' => { escaped.push('\\'); escaped.push('\\'); }
70            '\n' => { escaped.push('\\'); escaped.push('n');  }
71            '\r' => { escaped.push('\\'); escaped.push('r');  }
72            '\t' => { escaped.push('\\'); escaped.push('t');  }
73            '/'  => { escaped.push('\\'); escaped.push('/');  }
74            '\u{0008}' => { escaped.push('\\'); escaped.push('b'); }
75            '\u{000c}' => { escaped.push('\\'); escaped.push('f'); }
76            _ => escaped.push(c),
77        }
78    }
79    escaped
80}
81
82/// Unescape a JSON5-style (ES5.1-like) string.
83/// - `\uXXXX` => exactly 4 hex digits (no `\u{...}`).
84/// - `\xNN`   => exactly 2 hex digits.
85/// - Single-char escapes like `\n`, `\r`, `\t`, `\b`, `\f`, `\v`, `\0`, `\\`, `\"`, `\/`.
86/// Returns an owned `String` with all escapes resolved.
87pub fn unescape(input: &str) -> Result<String, String> {
88    let mut output = String::with_capacity(input.len());
89    let mut chars = input.chars().peekable();
90
91    while let Some(ch) = chars.next() {
92        if ch != '\\' {
93            // Normal character
94            output.push(ch);
95        } else {
96            // We have a backslash; look at the next char
97            let esc = chars.next().ok_or_else(|| err("Incomplete escape at end of string"))?;
98            match esc {
99                'a' => output.push('\x07'),
100                'n' =>  output.push('\n'),
101                'r' =>  output.push('\r'),
102                't' =>  output.push('\t'),
103                'b' =>  output.push('\x08'), // backspace
104                'f' =>  output.push('\x0C'), // form-feed
105                'v' =>  output.push('\x0B'),
106                '0' =>  output.push('\0'),
107                '\\' => output.push('\\'),
108                '\'' => output.push('\''),
109                '"'  => output.push('\"'),
110                '/'  => output.push('/'), // optional in JSON5
111                '\n' | '\r' | '\u{2028}' | '\u{2029}' => {
112                    output.push(esc);
113                }
114                'x' => {
115                    // \xNN => exactly 2 hex digits
116                    let val = read_hex_digits(&mut chars, 2, "\\x")?;
117                    output.push(char_from_u32(val)?);
118                }
119                'u' => {
120                    // \uXXXX => exactly 4 hex digits (ES5.1)
121                    let val = read_hex_digits(&mut chars, 4, "\\u")?;
122                    output.push(char_from_u32(val)?);
123                }
124                _ => {
125                    // Unknown escape
126                    return Err(format!("Unknown escape character: {}", esc));
127                }
128            }
129        }
130    }
131    Ok(output)
132}
133
134/// Read exactly `count` hex digits from `chars`, returning the combined u32.
135/// The `context` helps produce a clearer error message (like `"\u"` or `"\x"`).
136pub (crate) fn read_hex_digits<I: Iterator<Item = char>>(
137    chars: &mut std::iter::Peekable<I>,
138    count: usize,
139    context: &str
140) -> Result<u32, String> {
141    let mut val = 0u32;
142    for _ in 0..count {
143        let c = chars.next().ok_or_else(|| err(format!("Incomplete {} escape", context)))?;
144        let digit = c
145            .to_digit(16)
146            .ok_or_else(|| err(format!("Invalid hex digit '{}' in {} escape", c, context)))?;
147        val = (val << 4) | digit;
148    }
149    Ok(val)
150}
151
152/// Convert a u32 to a `char`, checking for valid Unicode scalar value range.
153fn char_from_u32(u: u32) -> Result<char, String> {
154    // In ES5.1, `\uXXXX` covers 0..=0xFFFF. If you need to disallow >0xFFFF, insert a check:
155    // if u > 0xFFFF { return Err(err(format!("Code point out of range: U+{:X}", u))); }
156    std::char::from_u32(u).ok_or_else(|| err(format!("Invalid Unicode code point U+{:X}", u)))
157}
158
159fn err<S: Into<String>>(message: S) -> String {
160    message.into()
161}